17 gadi atpakaļ · 26ce34a9c4
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -497,6 +497,23 @@ config OCFS2_DEBUG_FS
 
															 	  this option for debugging only as it is likely to decrease
														
 
															 	  performance of the filesystem.
														
 
															+config BTRFS_FS
														
 
															+	tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format"
														
 
															+	depends on EXPERIMENTAL
														
 
															+	select LIBCRC32C
														
 
															+	help
														
 
															+	  Btrfs is a new filesystem with extents, writable snapshotting,
														
 
															+	  support for multiple devices and many more features.
														
 
															+
														
 
															+	  Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
														
 
															+	  FINALIZED.  You should say N here unless you are interested in
														
 
															+	  testing Btrfs with non-critical data.
														
 
															+
														
 
															+	  To compile this file system support as a module, choose M here. The
														
 
															+	  module will be called btrfs.
														
 
															+
														
 
															+	  If unsure, say N.
														
 
															+
														
 
															 endif # BLOCK
														
 
															 config DNOTIFY
														
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -121,4 +121,5 @@ obj-$(CONFIG_HOSTFS)		+= hostfs/
 
															 obj-$(CONFIG_HPPFS)		+= hppfs/
														
 
															 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
														
 
															 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
														
 
															+obj-$(CONFIG_BTRFS_FS)		+= btrfs/
														
 
															 obj-$(CONFIG_GFS2_FS)           += gfs2/
														
--- a/fs/btrfs/COPYING
+++ b/fs/btrfs/COPYING
@@ -0,0 +1,356 @@
 
															+
														
 
															+   NOTE! This copyright does *not* cover user programs that use kernel
														
 
															+ services by normal system calls - this is merely considered normal use
														
 
															+ of the kernel, and does *not* fall under the heading of "derived work".
														
 
															+ Also note that the GPL below is copyrighted by the Free Software
														
 
															+ Foundation, but the instance of code that it refers to (the Linux
														
 
															+ kernel) is copyrighted by me and others who actually wrote it.
														
 
															+
														
 
															+ Also note that the only valid version of the GPL as far as the kernel
														
 
															+ is concerned is _this_ particular version of the license (ie v2, not
														
 
															+ v2.2 or v3.x or whatever), unless explicitly otherwise stated.
														
 
															+
														
 
															+			Linus Torvalds
														
 
															+
														
 
															+----------------------------------------
														
 
															+
														
 
															+		    GNU GENERAL PUBLIC LICENSE
														
 
															+		       Version 2, June 1991
														
 
															+
														
 
															+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
														
 
															+                       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
														
 
															+ Everyone is permitted to copy and distribute verbatim copies
														
 
															+ of this license document, but changing it is not allowed.
														
 
															+
														
 
															+			    Preamble
														
 
															+
														
 
															+  The licenses for most software are designed to take away your
														
 
															+freedom to share and change it.  By contrast, the GNU General Public
														
 
															+License is intended to guarantee your freedom to share and change free
														
 
															+software--to make sure the software is free for all its users.  This
														
 
															+General Public License applies to most of the Free Software
														
 
															+Foundation's software and to any other program whose authors commit to
														
 
															+using it.  (Some other Free Software Foundation software is covered by
														
 
															+the GNU Library General Public License instead.)  You can apply it to
														
 
															+your programs, too.
														
 
															+
														
 
															+  When we speak of free software, we are referring to freedom, not
														
 
															+price.  Our General Public Licenses are designed to make sure that you
														
 
															+have the freedom to distribute copies of free software (and charge for
														
 
															+this service if you wish), that you receive source code or can get it
														
 
															+if you want it, that you can change the software or use pieces of it
														
 
															+in new free programs; and that you know you can do these things.
														
 
															+
														
 
															+  To protect your rights, we need to make restrictions that forbid
														
 
															+anyone to deny you these rights or to ask you to surrender the rights.
														
 
															+These restrictions translate to certain responsibilities for you if you
														
 
															+distribute copies of the software, or if you modify it.
														
 
															+
														
 
															+  For example, if you distribute copies of such a program, whether
														
 
															+gratis or for a fee, you must give the recipients all the rights that
														
 
															+you have.  You must make sure that they, too, receive or can get the
														
 
															+source code.  And you must show them these terms so they know their
														
 
															+rights.
														
 
															+
														
 
															+  We protect your rights with two steps: (1) copyright the software, and
														
 
															+(2) offer you this license which gives you legal permission to copy,
														
 
															+distribute and/or modify the software.
														
 
															+
														
 
															+  Also, for each author's protection and ours, we want to make certain
														
 
															+that everyone understands that there is no warranty for this free
														
 
															+software.  If the software is modified by someone else and passed on, we
														
 
															+want its recipients to know that what they have is not the original, so
														
 
															+that any problems introduced by others will not reflect on the original
														
 
															+authors' reputations.
														
 
															+
														
 
															+  Finally, any free program is threatened constantly by software
														
 
															+patents.  We wish to avoid the danger that redistributors of a free
														
 
															+program will individually obtain patent licenses, in effect making the
														
 
															+program proprietary.  To prevent this, we have made it clear that any
														
 
															+patent must be licensed for everyone's free use or not licensed at all.
														
 
															+
														
 
															+  The precise terms and conditions for copying, distribution and
														
 
															+modification follow.
														
 
															+
														
 
															+		    GNU GENERAL PUBLIC LICENSE
														
 
															+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
														
 
															+
														
 
															+  0. This License applies to any program or other work which contains
														
 
															+a notice placed by the copyright holder saying it may be distributed
														
 
															+under the terms of this General Public License.  The "Program", below,
														
 
															+refers to any such program or work, and a "work based on the Program"
														
 
															+means either the Program or any derivative work under copyright law:
														
 
															+that is to say, a work containing the Program or a portion of it,
														
 
															+either verbatim or with modifications and/or translated into another
														
 
															+language.  (Hereinafter, translation is included without limitation in
														
 
															+the term "modification".)  Each licensee is addressed as "you".
														
 
															+
														
 
															+Activities other than copying, distribution and modification are not
														
 
															+covered by this License; they are outside its scope.  The act of
														
 
															+running the Program is not restricted, and the output from the Program
														
 
															+is covered only if its contents constitute a work based on the
														
 
															+Program (independent of having been made by running the Program).
														
 
															+Whether that is true depends on what the Program does.
														
 
															+
														
 
															+  1. You may copy and distribute verbatim copies of the Program's
														
 
															+source code as you receive it, in any medium, provided that you
														
 
															+conspicuously and appropriately publish on each copy an appropriate
														
 
															+copyright notice and disclaimer of warranty; keep intact all the
														
 
															+notices that refer to this License and to the absence of any warranty;
														
 
															+and give any other recipients of the Program a copy of this License
														
 
															+along with the Program.
														
 
															+
														
 
															+You may charge a fee for the physical act of transferring a copy, and
														
 
															+you may at your option offer warranty protection in exchange for a fee.
														
 
															+
														
 
															+  2. You may modify your copy or copies of the Program or any portion
														
 
															+of it, thus forming a work based on the Program, and copy and
														
 
															+distribute such modifications or work under the terms of Section 1
														
 
															+above, provided that you also meet all of these conditions:
														
 
															+
														
 
															+    a) You must cause the modified files to carry prominent notices
														
 
															+    stating that you changed the files and the date of any change.
														
 
															+
														
 
															+    b) You must cause any work that you distribute or publish, that in
														
 
															+    whole or in part contains or is derived from the Program or any
														
 
															+    part thereof, to be licensed as a whole at no charge to all third
														
 
															+    parties under the terms of this License.
														
 
															+
														
 
															+    c) If the modified program normally reads commands interactively
														
 
															+    when run, you must cause it, when started running for such
														
 
															+    interactive use in the most ordinary way, to print or display an
														
 
															+    announcement including an appropriate copyright notice and a
														
 
															+    notice that there is no warranty (or else, saying that you provide
														
 
															+    a warranty) and that users may redistribute the program under
														
 
															+    these conditions, and telling the user how to view a copy of this
														
 
															+    License.  (Exception: if the Program itself is interactive but
														
 
															+    does not normally print such an announcement, your work based on
														
 
															+    the Program is not required to print an announcement.)
														
 
															+
														
 
															+These requirements apply to the modified work as a whole.  If
														
 
															+identifiable sections of that work are not derived from the Program,
														
 
															+and can be reasonably considered independent and separate works in
														
 
															+themselves, then this License, and its terms, do not apply to those
														
 
															+sections when you distribute them as separate works.  But when you
														
 
															+distribute the same sections as part of a whole which is a work based
														
 
															+on the Program, the distribution of the whole must be on the terms of
														
 
															+this License, whose permissions for other licensees extend to the
														
 
															+entire whole, and thus to each and every part regardless of who wrote it.
														
 
															+
														
 
															+Thus, it is not the intent of this section to claim rights or contest
														
 
															+your rights to work written entirely by you; rather, the intent is to
														
 
															+exercise the right to control the distribution of derivative or
														
 
															+collective works based on the Program.
														
 
															+
														
 
															+In addition, mere aggregation of another work not based on the Program
														
 
															+with the Program (or with a work based on the Program) on a volume of
														
 
															+a storage or distribution medium does not bring the other work under
														
 
															+the scope of this License.
														
 
															+
														
 
															+  3. You may copy and distribute the Program (or a work based on it,
														
 
															+under Section 2) in object code or executable form under the terms of
														
 
															+Sections 1 and 2 above provided that you also do one of the following:
														
 
															+
														
 
															+    a) Accompany it with the complete corresponding machine-readable
														
 
															+    source code, which must be distributed under the terms of Sections
														
 
															+    1 and 2 above on a medium customarily used for software interchange; or,
														
 
															+
														
 
															+    b) Accompany it with a written offer, valid for at least three
														
 
															+    years, to give any third party, for a charge no more than your
														
 
															+    cost of physically performing source distribution, a complete
														
 
															+    machine-readable copy of the corresponding source code, to be
														
 
															+    distributed under the terms of Sections 1 and 2 above on a medium
														
 
															+    customarily used for software interchange; or,
														
 
															+
														
 
															+    c) Accompany it with the information you received as to the offer
														
 
															+    to distribute corresponding source code.  (This alternative is
														
 
															+    allowed only for noncommercial distribution and only if you
														
 
															+    received the program in object code or executable form with such
														
 
															+    an offer, in accord with Subsection b above.)
														
 
															+
														
 
															+The source code for a work means the preferred form of the work for
														
 
															+making modifications to it.  For an executable work, complete source
														
 
															+code means all the source code for all modules it contains, plus any
														
 
															+associated interface definition files, plus the scripts used to
														
 
															+control compilation and installation of the executable.  However, as a
														
 
															+special exception, the source code distributed need not include
														
 
															+anything that is normally distributed (in either source or binary
														
 
															+form) with the major components (compiler, kernel, and so on) of the
														
 
															+operating system on which the executable runs, unless that component
														
 
															+itself accompanies the executable.
														
 
															+
														
 
															+If distribution of executable or object code is made by offering
														
 
															+access to copy from a designated place, then offering equivalent
														
 
															+access to copy the source code from the same place counts as
														
 
															+distribution of the source code, even though third parties are not
														
 
															+compelled to copy the source along with the object code.
														
 
															+
														
 
															+  4. You may not copy, modify, sublicense, or distribute the Program
														
 
															+except as expressly provided under this License.  Any attempt
														
 
															+otherwise to copy, modify, sublicense or distribute the Program is
														
 
															+void, and will automatically terminate your rights under this License.
														
 
															+However, parties who have received copies, or rights, from you under
														
 
															+this License will not have their licenses terminated so long as such
														
 
															+parties remain in full compliance.
														
 
															+
														
 
															+  5. You are not required to accept this License, since you have not
														
 
															+signed it.  However, nothing else grants you permission to modify or
														
 
															+distribute the Program or its derivative works.  These actions are
														
 
															+prohibited by law if you do not accept this License.  Therefore, by
														
 
															+modifying or distributing the Program (or any work based on the
														
 
															+Program), you indicate your acceptance of this License to do so, and
														
 
															+all its terms and conditions for copying, distributing or modifying
														
 
															+the Program or works based on it.
														
 
															+
														
 
															+  6. Each time you redistribute the Program (or any work based on the
														
 
															+Program), the recipient automatically receives a license from the
														
 
															+original licensor to copy, distribute or modify the Program subject to
														
 
															+these terms and conditions.  You may not impose any further
														
 
															+restrictions on the recipients' exercise of the rights granted herein.
														
 
															+You are not responsible for enforcing compliance by third parties to
														
 
															+this License.
														
 
															+
														
 
															+  7. If, as a consequence of a court judgment or allegation of patent
														
 
															+infringement or for any other reason (not limited to patent issues),
														
 
															+conditions are imposed on you (whether by court order, agreement or
														
 
															+otherwise) that contradict the conditions of this License, they do not
														
 
															+excuse you from the conditions of this License.  If you cannot
														
 
															+distribute so as to satisfy simultaneously your obligations under this
														
 
															+License and any other pertinent obligations, then as a consequence you
														
 
															+may not distribute the Program at all.  For example, if a patent
														
 
															+license would not permit royalty-free redistribution of the Program by
														
 
															+all those who receive copies directly or indirectly through you, then
														
 
															+the only way you could satisfy both it and this License would be to
														
 
															+refrain entirely from distribution of the Program.
														
 
															+
														
 
															+If any portion of this section is held invalid or unenforceable under
														
 
															+any particular circumstance, the balance of the section is intended to
														
 
															+apply and the section as a whole is intended to apply in other
														
 
															+circumstances.
														
 
															+
														
 
															+It is not the purpose of this section to induce you to infringe any
														
 
															+patents or other property right claims or to contest validity of any
														
 
															+such claims; this section has the sole purpose of protecting the
														
 
															+integrity of the free software distribution system, which is
														
 
															+implemented by public license practices.  Many people have made
														
 
															+generous contributions to the wide range of software distributed
														
 
															+through that system in reliance on consistent application of that
														
 
															+system; it is up to the author/donor to decide if he or she is willing
														
 
															+to distribute software through any other system and a licensee cannot
														
 
															+impose that choice.
														
 
															+
														
 
															+This section is intended to make thoroughly clear what is believed to
														
 
															+be a consequence of the rest of this License.
														
 
															+
														
 
															+  8. If the distribution and/or use of the Program is restricted in
														
 
															+certain countries either by patents or by copyrighted interfaces, the
														
 
															+original copyright holder who places the Program under this License
														
 
															+may add an explicit geographical distribution limitation excluding
														
 
															+those countries, so that distribution is permitted only in or among
														
 
															+countries not thus excluded.  In such case, this License incorporates
														
 
															+the limitation as if written in the body of this License.
														
 
															+
														
 
															+  9. The Free Software Foundation may publish revised and/or new versions
														
 
															+of the General Public License from time to time.  Such new versions will
														
 
															+be similar in spirit to the present version, but may differ in detail to
														
 
															+address new problems or concerns.
														
 
															+
														
 
															+Each version is given a distinguishing version number.  If the Program
														
 
															+specifies a version number of this License which applies to it and "any
														
 
															+later version", you have the option of following the terms and conditions
														
 
															+either of that version or of any later version published by the Free
														
 
															+Software Foundation.  If the Program does not specify a version number of
														
 
															+this License, you may choose any version ever published by the Free Software
														
 
															+Foundation.
														
 
															+
														
 
															+  10. If you wish to incorporate parts of the Program into other free
														
 
															+programs whose distribution conditions are different, write to the author
														
 
															+to ask for permission.  For software which is copyrighted by the Free
														
 
															+Software Foundation, write to the Free Software Foundation; we sometimes
														
 
															+make exceptions for this.  Our decision will be guided by the two goals
														
 
															+of preserving the free status of all derivatives of our free software and
														
 
															+of promoting the sharing and reuse of software generally.
														
 
															+
														
 
															+			    NO WARRANTY
														
 
															+
														
 
															+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
														
 
															+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
														
 
															+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
														
 
															+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
														
 
															+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
														
 
															+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
														
 
															+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
														
 
															+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
														
 
															+REPAIR OR CORRECTION.
														
 
															+
														
 
															+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
														
 
															+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
														
 
															+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
														
 
															+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
														
 
															+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
														
 
															+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
														
 
															+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
														
 
															+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
														
 
															+POSSIBILITY OF SUCH DAMAGES.
														
 
															+
														
 
															+		     END OF TERMS AND CONDITIONS
														
 
															+
														
 
															+	    How to Apply These Terms to Your New Programs
														
 
															+
														
 
															+  If you develop a new program, and you want it to be of the greatest
														
 
															+possible use to the public, the best way to achieve this is to make it
														
 
															+free software which everyone can redistribute and change under these terms.
														
 
															+
														
 
															+  To do so, attach the following notices to the program.  It is safest
														
 
															+to attach them to the start of each source file to most effectively
														
 
															+convey the exclusion of warranty; and each file should have at least
														
 
															+the "copyright" line and a pointer to where the full notice is found.
														
 
															+
														
 
															+    <one line to give the program's name and a brief idea of what it does.>
														
 
															+    Copyright (C) <year>  <name of author>
														
 
															+
														
 
															+    This program is free software; you can redistribute it and/or modify
														
 
															+    it under the terms of the GNU General Public License as published by
														
 
															+    the Free Software Foundation; either version 2 of the License, or
														
 
															+    (at your option) any later version.
														
 
															+
														
 
															+    This program is distributed in the hope that it will be useful,
														
 
															+    but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+    GNU General Public License for more details.
														
 
															+
														
 
															+    You should have received a copy of the GNU General Public License
														
 
															+    along with this program; if not, write to the Free Software
														
 
															+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
														
 
															+
														
 
															+
														
 
															+Also add information on how to contact you by electronic and paper mail.
														
 
															+
														
 
															+If the program is interactive, make it output a short notice like this
														
 
															+when it starts in an interactive mode:
														
 
															+
														
 
															+    Gnomovision version 69, Copyright (C) year name of author
														
 
															+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
														
 
															+    This is free software, and you are welcome to redistribute it
														
 
															+    under certain conditions; type `show c' for details.
														
 
															+
														
 
															+The hypothetical commands `show w' and `show c' should show the appropriate
														
 
															+parts of the General Public License.  Of course, the commands you use may
														
 
															+be called something other than `show w' and `show c'; they could even be
														
 
															+mouse-clicks or menu items--whatever suits your program.
														
 
															+
														
 
															+You should also get your employer (if you work as a programmer) or your
														
 
															+school, if any, to sign a "copyright disclaimer" for the program, if
														
 
															+necessary.  Here is a sample; alter the names:
														
 
															+
														
 
															+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
														
 
															+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
														
 
															+
														
 
															+  <signature of Ty Coon>, 1 April 1989
														
 
															+  Ty Coon, President of Vice
														
 
															+
														
 
															+This General Public License does not permit incorporating your program into
														
 
															+proprietary programs.  If your program is a subroutine library, you may
														
 
															+consider it more useful to permit linking proprietary applications with the
														
 
															+library.  If this is what you want to do, use the GNU Library General
														
 
															+Public License instead of this License.
														
--- a/fs/btrfs/INSTALL
+++ b/fs/btrfs/INSTALL
@@ -0,0 +1,48 @@
 
															+Install Instructions
														
 
															+
														
 
															+Btrfs puts snapshots and subvolumes into the root directory of the FS.  This
														
 
															+directory can only be changed by btrfsctl right now, and normal filesystem
														
 
															+operations do not work on it.  The default subvolume is called 'default',
														
 
															+and you can create files and directories in mount_point/default
														
 
															+
														
 
															+Btrfs uses libcrc32c in the kernel for file and metadata checksums.  You need
														
 
															+to compile the kernel with:
														
 
															+
														
 
															+CONFIG_LIBCRC32C=m
														
 
															+
														
 
															+libcrc32c can be static as well.  Once your kernel is setup, typing make in the
														
 
															+btrfs module sources will build against the running kernel.  When the build is
														
 
															+complete:
														
 
															+
														
 
															+modprobe libcrc32c
														
 
															+insmod btrfs.ko
														
 
															+
														
 
															+The Btrfs utility programs require libuuid to build.  This can be found
														
 
															+in the e2fsprogs sources, and is usually available as libuuid or
														
 
															+e2fsprogs-devel from various distros.
														
 
															+
														
 
															+Building the utilities is just make ; make install.  The programs go
														
 
															+into /usr/local/bin.  The commands available are:
														
 
															+
														
 
															+mkfs.btrfs: create a filesystem
														
 
															+
														
 
															+btrfsctl: control program to create snapshots and subvolumes:
														
 
															+
														
 
															+	mount /dev/sda2 /mnt
														
 
															+	btrfsctl -s new_subvol_name /mnt
														
 
															+	btrfsctl -s snapshot_of_default /mnt/default
														
 
															+	btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
														
 
															+	btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
														
 
															+	ls /mnt
														
 
															+	default snapshot_of_a_snapshot snapshot_of_new_subvol
														
 
															+	new_subvol_name snapshot_of_default
														
 
															+
														
 
															+	Snapshots and subvolumes cannot be deleted right now, but you can
														
 
															+	rm -rf all the files and directories inside them.
														
 
															+
														
 
															+btrfsck: do a limited check of the FS extent trees.</li>
														
 
															+
														
 
															+debug-tree: print all of the FS metadata in text form.  Example:
														
 
															+
														
 
															+	debug-tree /dev/sda2 >& big_output_file
														
 
															+
														
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -0,0 +1,24 @@
 
															+ifneq ($(KERNELRELEASE),)
														
 
															+# kbuild part of makefile
														
 
															+
														
 
															+obj-$(CONFIG_BTRFS_FS) := btrfs.o
														
 
															+btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
														
 
															+	   file-item.o inode-item.o inode-map.o disk-io.o \
														
 
															+	   transaction.o inode.o file.o tree-defrag.o \
														
 
															+	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
														
 
															+	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
														
 
															+	   ref-cache.o export.o tree-log.o acl.o free-space-cache.o
														
 
															+else
														
 
															+
														
 
															+# Normal Makefile
														
 
															+
														
 
															+KERNELDIR := /lib/modules/`uname -r`/build
														
 
															+all:
														
 
															+	$(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
														
 
															+
														
 
															+modules_install:
														
 
															+	$(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
														
 
															+clean:
														
 
															+	$(MAKE) -C $(KERNELDIR) M=`pwd` clean
														
 
															+
														
 
															+endif
														
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -0,0 +1,352 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Red Hat.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/string.h>
														
 
															+#include <linux/xattr.h>
														
 
															+#include <linux/posix_acl_xattr.h>
														
 
															+#include <linux/posix_acl.h>
														
 
															+#include <linux/sched.h>
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "xattr.h"
														
 
															+
														
 
															+#ifdef CONFIG_FS_POSIX_ACL
														
 
															+
														
 
															+static void btrfs_update_cached_acl(struct inode *inode,
														
 
															+				    struct posix_acl **p_acl,
														
 
															+				    struct posix_acl *acl)
														
 
															+{
														
 
															+	spin_lock(&inode->i_lock);
														
 
															+	if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
														
 
															+		posix_acl_release(*p_acl);
														
 
															+	*p_acl = posix_acl_dup(acl);
														
 
															+	spin_unlock(&inode->i_lock);
														
 
															+}
														
 
															+
														
 
															+static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
														
 
															+{
														
 
															+	int size;
														
 
															+	const char *name;
														
 
															+	char *value = NULL;
														
 
															+	struct posix_acl *acl = NULL, **p_acl;
														
 
															+
														
 
															+	switch (type) {
														
 
															+	case ACL_TYPE_ACCESS:
														
 
															+		name = POSIX_ACL_XATTR_ACCESS;
														
 
															+		p_acl = &BTRFS_I(inode)->i_acl;
														
 
															+		break;
														
 
															+	case ACL_TYPE_DEFAULT:
														
 
															+		name = POSIX_ACL_XATTR_DEFAULT;
														
 
															+		p_acl = &BTRFS_I(inode)->i_default_acl;
														
 
															+		break;
														
 
															+	default:
														
 
															+		return ERR_PTR(-EINVAL);
														
 
															+	}
														
 
															+
														
 
															+	spin_lock(&inode->i_lock);
														
 
															+	if (*p_acl != BTRFS_ACL_NOT_CACHED)
														
 
															+		acl = posix_acl_dup(*p_acl);
														
 
															+	spin_unlock(&inode->i_lock);
														
 
															+
														
 
															+	if (acl)
														
 
															+		return acl;
														
 
															+
														
 
															+
														
 
															+	size = __btrfs_getxattr(inode, name, "", 0);
														
 
															+	if (size > 0) {
														
 
															+		value = kzalloc(size, GFP_NOFS);
														
 
															+		if (!value)
														
 
															+			return ERR_PTR(-ENOMEM);
														
 
															+		size = __btrfs_getxattr(inode, name, value, size);
														
 
															+		if (size > 0) {
														
 
															+			acl = posix_acl_from_xattr(value, size);
														
 
															+			btrfs_update_cached_acl(inode, p_acl, acl);
														
 
															+		}
														
 
															+		kfree(value);
														
 
															+	} else if (size == -ENOENT) {
														
 
															+		acl = NULL;
														
 
															+		btrfs_update_cached_acl(inode, p_acl, acl);
														
 
															+	}
														
 
															+
														
 
															+	return acl;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_xattr_get_acl(struct inode *inode, int type,
														
 
															+			       void *value, size_t size)
														
 
															+{
														
 
															+	struct posix_acl *acl;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	acl = btrfs_get_acl(inode, type);
														
 
															+
														
 
															+	if (IS_ERR(acl))
														
 
															+		return PTR_ERR(acl);
														
 
															+	if (acl == NULL)
														
 
															+		return -ENODATA;
														
 
															+	ret = posix_acl_to_xattr(acl, value, size);
														
 
															+	posix_acl_release(acl);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Needs to be called with fs_mutex held
														
 
															+ */
														
 
															+static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
														
 
															+{
														
 
															+	int ret, size = 0;
														
 
															+	const char *name;
														
 
															+	struct posix_acl **p_acl;
														
 
															+	char *value = NULL;
														
 
															+	mode_t mode;
														
 
															+
														
 
															+	if (acl) {
														
 
															+		ret = posix_acl_valid(acl);
														
 
															+		if (ret < 0)
														
 
															+			return ret;
														
 
															+		ret = 0;
														
 
															+	}
														
 
															+
														
 
															+	switch (type) {
														
 
															+	case ACL_TYPE_ACCESS:
														
 
															+		mode = inode->i_mode;
														
 
															+		ret = posix_acl_equiv_mode(acl, &mode);
														
 
															+		if (ret < 0)
														
 
															+			return ret;
														
 
															+		ret = 0;
														
 
															+		inode->i_mode = mode;
														
 
															+		name = POSIX_ACL_XATTR_ACCESS;
														
 
															+		p_acl = &BTRFS_I(inode)->i_acl;
														
 
															+		break;
														
 
															+	case ACL_TYPE_DEFAULT:
														
 
															+		if (!S_ISDIR(inode->i_mode))
														
 
															+			return acl ? -EINVAL : 0;
														
 
															+		name = POSIX_ACL_XATTR_DEFAULT;
														
 
															+		p_acl = &BTRFS_I(inode)->i_default_acl;
														
 
															+		break;
														
 
															+	default:
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	if (acl) {
														
 
															+		size = posix_acl_xattr_size(acl->a_count);
														
 
															+		value = kmalloc(size, GFP_NOFS);
														
 
															+		if (!value) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		ret = posix_acl_to_xattr(acl, value, size);
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+	}
														
 
															+
														
 
															+	ret = __btrfs_setxattr(inode, name, value, size, 0);
														
 
															+
														
 
															+out:
														
 
															+	if (value)
														
 
															+		kfree(value);
														
 
															+
														
 
															+	if (!ret)
														
 
															+		btrfs_update_cached_acl(inode, p_acl, acl);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_xattr_set_acl(struct inode *inode, int type,
														
 
															+			       const void *value, size_t size)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct posix_acl *acl = NULL;
														
 
															+
														
 
															+	if (value) {
														
 
															+		acl = posix_acl_from_xattr(value, size);
														
 
															+		if (acl == NULL) {
														
 
															+			value = NULL;
														
 
															+			size = 0;
														
 
															+		} else if (IS_ERR(acl)) {
														
 
															+			return PTR_ERR(acl);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_set_acl(inode, acl, type);
														
 
															+
														
 
															+	posix_acl_release(acl);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
														
 
															+				      void *value, size_t size)
														
 
															+{
														
 
															+	return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
														
 
															+}
														
 
															+
														
 
															+static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
														
 
															+				      const void *value, size_t size, int flags)
														
 
															+{
														
 
															+	return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
														
 
															+}
														
 
															+
														
 
															+static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
														
 
															+				       void *value, size_t size)
														
 
															+{
														
 
															+	return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
														
 
															+}
														
 
															+
														
 
															+static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
														
 
															+				       const void *value, size_t size, int flags)
														
 
															+{
														
 
															+	return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
														
 
															+}
														
 
															+
														
 
															+int btrfs_check_acl(struct inode *inode, int mask)
														
 
															+{
														
 
															+	struct posix_acl *acl;
														
 
															+	int error = -EAGAIN;
														
 
															+
														
 
															+	acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
														
 
															+
														
 
															+	if (IS_ERR(acl))
														
 
															+		return PTR_ERR(acl);
														
 
															+	if (acl) {
														
 
															+		error = posix_acl_permission(inode, acl, mask);
														
 
															+		posix_acl_release(acl);
														
 
															+	}
														
 
															+
														
 
															+	return error;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * btrfs_init_acl is already generally called under fs_mutex, so the locking
														
 
															+ * stuff has been fixed to work with that.  If the locking stuff changes, we
														
 
															+ * need to re-evaluate the acl locking stuff.
														
 
															+ */
														
 
															+int btrfs_init_acl(struct inode *inode, struct inode *dir)
														
 
															+{
														
 
															+	struct posix_acl *acl = NULL;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	/* this happens with subvols */
														
 
															+	if (!dir)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (!S_ISLNK(inode->i_mode)) {
														
 
															+		if (IS_POSIXACL(dir)) {
														
 
															+			acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
														
 
															+			if (IS_ERR(acl))
														
 
															+				return PTR_ERR(acl);
														
 
															+		}
														
 
															+
														
 
															+		if (!acl)
														
 
															+			inode->i_mode &= ~current->fs->umask;
														
 
															+	}
														
 
															+
														
 
															+	if (IS_POSIXACL(dir) && acl) {
														
 
															+		struct posix_acl *clone;
														
 
															+		mode_t mode;
														
 
															+
														
 
															+		if (S_ISDIR(inode->i_mode)) {
														
 
															+			ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
														
 
															+			if (ret)
														
 
															+				goto failed;
														
 
															+		}
														
 
															+		clone = posix_acl_clone(acl, GFP_NOFS);
														
 
															+		ret = -ENOMEM;
														
 
															+		if (!clone)
														
 
															+			goto failed;
														
 
															+
														
 
															+		mode = inode->i_mode;
														
 
															+		ret = posix_acl_create_masq(clone, &mode);
														
 
															+		if (ret >= 0) {
														
 
															+			inode->i_mode = mode;
														
 
															+			if (ret > 0) {
														
 
															+				/* we need an acl */
														
 
															+				ret = btrfs_set_acl(inode, clone,
														
 
															+						    ACL_TYPE_ACCESS);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+failed:
														
 
															+	posix_acl_release(acl);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_acl_chmod(struct inode *inode)
														
 
															+{
														
 
															+	struct posix_acl *acl, *clone;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (S_ISLNK(inode->i_mode))
														
 
															+		return -EOPNOTSUPP;
														
 
															+
														
 
															+	if (!IS_POSIXACL(inode))
														
 
															+		return 0;
														
 
															+
														
 
															+	acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
														
 
															+	if (IS_ERR(acl) || !acl)
														
 
															+		return PTR_ERR(acl);
														
 
															+
														
 
															+	clone = posix_acl_clone(acl, GFP_KERNEL);
														
 
															+	posix_acl_release(acl);
														
 
															+	if (!clone)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = posix_acl_chmod_masq(clone, inode->i_mode);
														
 
															+	if (!ret)
														
 
															+		ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
														
 
															+
														
 
															+	posix_acl_release(clone);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct xattr_handler btrfs_xattr_acl_default_handler = {
														
 
															+	.prefix = POSIX_ACL_XATTR_DEFAULT,
														
 
															+	.get	= btrfs_xattr_acl_default_get,
														
 
															+	.set	= btrfs_xattr_acl_default_set,
														
 
															+};
														
 
															+
														
 
															+struct xattr_handler btrfs_xattr_acl_access_handler = {
														
 
															+	.prefix = POSIX_ACL_XATTR_ACCESS,
														
 
															+	.get	= btrfs_xattr_acl_access_get,
														
 
															+	.set	= btrfs_xattr_acl_access_set,
														
 
															+};
														
 
															+
														
 
															+#else /* CONFIG_FS_POSIX_ACL */
														
 
															+
														
 
															+int btrfs_acl_chmod(struct inode *inode)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_init_acl(struct inode *inode, struct inode *dir)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_check_acl(struct inode *inode, int mask)
														
 
															+{
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+#endif /* CONFIG_FS_POSIX_ACL */
														
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -0,0 +1,357 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/version.h>
														
 
															+#include <linux/kthread.h>
														
 
															+#include <linux/list.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+# include <linux/freezer.h>
														
 
															+#include "async-thread.h"
														
 
															+
														
 
															+/*
														
 
															+ * container for the kthread task pointer and the list of pending work
														
 
															+ * One of these is allocated per thread.
														
 
															+ */
														
 
															+struct btrfs_worker_thread {
														
 
															+	/* pool we belong to */
														
 
															+	struct btrfs_workers *workers;
														
 
															+
														
 
															+	/* list of struct btrfs_work that are waiting for service */
														
 
															+	struct list_head pending;
														
 
															+
														
 
															+	/* list of worker threads from struct btrfs_workers */
														
 
															+	struct list_head worker_list;
														
 
															+
														
 
															+	/* kthread */
														
 
															+	struct task_struct *task;
														
 
															+
														
 
															+	/* number of things on the pending list */
														
 
															+	atomic_t num_pending;
														
 
															+
														
 
															+	unsigned long sequence;
														
 
															+
														
 
															+	/* protects the pending list. */
														
 
															+	spinlock_t lock;
														
 
															+
														
 
															+	/* set to non-zero when this thread is already awake and kicking */
														
 
															+	int working;
														
 
															+
														
 
															+	/* are we currently idle */
														
 
															+	int idle;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * helper function to move a thread onto the idle list after it
														
 
															+ * has finished some requests.
														
 
															+ */
														
 
															+static void check_idle_worker(struct btrfs_worker_thread *worker)
														
 
															+{
														
 
															+	if (!worker->idle && atomic_read(&worker->num_pending) <
														
 
															+	    worker->workers->idle_thresh / 2) {
														
 
															+		unsigned long flags;
														
 
															+		spin_lock_irqsave(&worker->workers->lock, flags);
														
 
															+		worker->idle = 1;
														
 
															+		list_move(&worker->worker_list, &worker->workers->idle_list);
														
 
															+		spin_unlock_irqrestore(&worker->workers->lock, flags);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to move a thread off the idle list after new
														
 
															+ * pending work is added.
														
 
															+ */
														
 
															+static void check_busy_worker(struct btrfs_worker_thread *worker)
														
 
															+{
														
 
															+	if (worker->idle && atomic_read(&worker->num_pending) >=
														
 
															+	    worker->workers->idle_thresh) {
														
 
															+		unsigned long flags;
														
 
															+		spin_lock_irqsave(&worker->workers->lock, flags);
														
 
															+		worker->idle = 0;
														
 
															+		list_move_tail(&worker->worker_list,
														
 
															+			       &worker->workers->worker_list);
														
 
															+		spin_unlock_irqrestore(&worker->workers->lock, flags);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * main loop for servicing work items
														
 
															+ */
														
 
															+static int worker_loop(void *arg)
														
 
															+{
														
 
															+	struct btrfs_worker_thread *worker = arg;
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_work *work;
														
 
															+	do {
														
 
															+		spin_lock_irq(&worker->lock);
														
 
															+		while(!list_empty(&worker->pending)) {
														
 
															+			cur = worker->pending.next;
														
 
															+			work = list_entry(cur, struct btrfs_work, list);
														
 
															+			list_del(&work->list);
														
 
															+			clear_bit(0, &work->flags);
														
 
															+
														
 
															+			work->worker = worker;
														
 
															+			spin_unlock_irq(&worker->lock);
														
 
															+
														
 
															+			work->func(work);
														
 
															+
														
 
															+			atomic_dec(&worker->num_pending);
														
 
															+			spin_lock_irq(&worker->lock);
														
 
															+			check_idle_worker(worker);
														
 
															+		}
														
 
															+		worker->working = 0;
														
 
															+		if (freezing(current)) {
														
 
															+			refrigerator();
														
 
															+		} else {
														
 
															+			set_current_state(TASK_INTERRUPTIBLE);
														
 
															+			spin_unlock_irq(&worker->lock);
														
 
															+			schedule();
														
 
															+			__set_current_state(TASK_RUNNING);
														
 
															+		}
														
 
															+	} while (!kthread_should_stop());
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this will wait for all the worker threads to shutdown
														
 
															+ */
														
 
															+int btrfs_stop_workers(struct btrfs_workers *workers)
														
 
															+{
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_worker_thread *worker;
														
 
															+
														
 
															+	list_splice_init(&workers->idle_list, &workers->worker_list);
														
 
															+	while(!list_empty(&workers->worker_list)) {
														
 
															+		cur = workers->worker_list.next;
														
 
															+		worker = list_entry(cur, struct btrfs_worker_thread,
														
 
															+				    worker_list);
														
 
															+		kthread_stop(worker->task);
														
 
															+		list_del(&worker->worker_list);
														
 
															+		kfree(worker);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * simple init on struct btrfs_workers
														
 
															+ */
														
 
															+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
														
 
															+{
														
 
															+	workers->num_workers = 0;
														
 
															+	INIT_LIST_HEAD(&workers->worker_list);
														
 
															+	INIT_LIST_HEAD(&workers->idle_list);
														
 
															+	spin_lock_init(&workers->lock);
														
 
															+	workers->max_workers = max;
														
 
															+	workers->idle_thresh = 32;
														
 
															+	workers->name = name;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * starts new worker threads.  This does not enforce the max worker
														
 
															+ * count in case you need to temporarily go past it.
														
 
															+ */
														
 
															+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
														
 
															+{
														
 
															+	struct btrfs_worker_thread *worker;
														
 
															+	int ret = 0;
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < num_workers; i++) {
														
 
															+		worker = kzalloc(sizeof(*worker), GFP_NOFS);
														
 
															+		if (!worker) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto fail;
														
 
															+		}
														
 
															+
														
 
															+		INIT_LIST_HEAD(&worker->pending);
														
 
															+		INIT_LIST_HEAD(&worker->worker_list);
														
 
															+		spin_lock_init(&worker->lock);
														
 
															+		atomic_set(&worker->num_pending, 0);
														
 
															+		worker->task = kthread_run(worker_loop, worker,
														
 
															+					   "btrfs-%s-%d", workers->name,
														
 
															+					   workers->num_workers + i);
														
 
															+		worker->workers = workers;
														
 
															+		if (IS_ERR(worker->task)) {
														
 
															+			kfree(worker);
														
 
															+			ret = PTR_ERR(worker->task);
														
 
															+			goto fail;
														
 
															+		}
														
 
															+
														
 
															+		spin_lock_irq(&workers->lock);
														
 
															+		list_add_tail(&worker->worker_list, &workers->idle_list);
														
 
															+		worker->idle = 1;
														
 
															+		workers->num_workers++;
														
 
															+		spin_unlock_irq(&workers->lock);
														
 
															+	}
														
 
															+	return 0;
														
 
															+fail:
														
 
															+	btrfs_stop_workers(workers);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * run through the list and find a worker thread that doesn't have a lot
														
 
															+ * to do right now.  This can return null if we aren't yet at the thread
														
 
															+ * count limit and all of the threads are busy.
														
 
															+ */
														
 
															+static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
														
 
															+{
														
 
															+	struct btrfs_worker_thread *worker;
														
 
															+	struct list_head *next;
														
 
															+	int enforce_min = workers->num_workers < workers->max_workers;
														
 
															+
														
 
															+	/*
														
 
															+	 * if we find an idle thread, don't move it to the end of the
														
 
															+	 * idle list.  This improves the chance that the next submission
														
 
															+	 * will reuse the same thread, and maybe catch it while it is still
														
 
															+	 * working
														
 
															+	 */
														
 
															+	if (!list_empty(&workers->idle_list)) {
														
 
															+		next = workers->idle_list.next;
														
 
															+		worker = list_entry(next, struct btrfs_worker_thread,
														
 
															+				    worker_list);
														
 
															+		return worker;
														
 
															+	}
														
 
															+	if (enforce_min || list_empty(&workers->worker_list))
														
 
															+		return NULL;
														
 
															+
														
 
															+	/*
														
 
															+	 * if we pick a busy task, move the task to the end of the list.
														
 
															+	 * hopefully this will keep things somewhat evenly balanced.
														
 
															+	 * Do the move in batches based on the sequence number.  This groups
														
 
															+	 * requests submitted at roughly the same time onto the same worker.
														
 
															+	 */
														
 
															+	next = workers->worker_list.next;
														
 
															+	worker = list_entry(next, struct btrfs_worker_thread, worker_list);
														
 
															+	atomic_inc(&worker->num_pending);
														
 
															+	worker->sequence++;
														
 
															+
														
 
															+	if (worker->sequence % workers->idle_thresh == 0)
														
 
															+		list_move_tail(next, &workers->worker_list);
														
 
															+	return worker;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * selects a worker thread to take the next job.  This will either find
														
 
															+ * an idle worker, start a new worker up to the max count, or just return
														
 
															+ * one of the existing busy workers.
														
 
															+ */
														
 
															+static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
														
 
															+{
														
 
															+	struct btrfs_worker_thread *worker;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+again:
														
 
															+	spin_lock_irqsave(&workers->lock, flags);
														
 
															+	worker = next_worker(workers);
														
 
															+	spin_unlock_irqrestore(&workers->lock, flags);
														
 
															+
														
 
															+	if (!worker) {
														
 
															+		spin_lock_irqsave(&workers->lock, flags);
														
 
															+		if (workers->num_workers >= workers->max_workers) {
														
 
															+			struct list_head *fallback = NULL;
														
 
															+			/*
														
 
															+			 * we have failed to find any workers, just
														
 
															+			 * return the force one
														
 
															+			 */
														
 
															+			if (!list_empty(&workers->worker_list))
														
 
															+				fallback = workers->worker_list.next;
														
 
															+			if (!list_empty(&workers->idle_list))
														
 
															+				fallback = workers->idle_list.next;
														
 
															+			BUG_ON(!fallback);
														
 
															+			worker = list_entry(fallback,
														
 
															+				  struct btrfs_worker_thread, worker_list);
														
 
															+			spin_unlock_irqrestore(&workers->lock, flags);
														
 
															+		} else {
														
 
															+			spin_unlock_irqrestore(&workers->lock, flags);
														
 
															+			/* we're below the limit, start another worker */
														
 
															+			btrfs_start_workers(workers, 1);
														
 
															+			goto again;
														
 
															+		}
														
 
															+	}
														
 
															+	return worker;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * btrfs_requeue_work just puts the work item back on the tail of the list
														
 
															+ * it was taken from.  It is intended for use with long running work functions
														
 
															+ * that make some progress and want to give the cpu up for others.
														
 
															+ */
														
 
															+int btrfs_requeue_work(struct btrfs_work *work)
														
 
															+{
														
 
															+	struct btrfs_worker_thread *worker = work->worker;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	if (test_and_set_bit(0, &work->flags))
														
 
															+		goto out;
														
 
															+
														
 
															+	spin_lock_irqsave(&worker->lock, flags);
														
 
															+	atomic_inc(&worker->num_pending);
														
 
															+	list_add_tail(&work->list, &worker->pending);
														
 
															+
														
 
															+	/* by definition we're busy, take ourselves off the idle
														
 
															+	 * list
														
 
															+	 */
														
 
															+	if (worker->idle) {
														
 
															+		spin_lock_irqsave(&worker->workers->lock, flags);
														
 
															+		worker->idle = 0;
														
 
															+		list_move_tail(&worker->worker_list,
														
 
															+			       &worker->workers->worker_list);
														
 
															+		spin_unlock_irqrestore(&worker->workers->lock, flags);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock_irqrestore(&worker->lock, flags);
														
 
															+
														
 
															+out:
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * places a struct btrfs_work into the pending queue of one of the kthreads
														
 
															+ */
														
 
															+int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
														
 
															+{
														
 
															+	struct btrfs_worker_thread *worker;
														
 
															+	unsigned long flags;
														
 
															+	int wake = 0;
														
 
															+
														
 
															+	/* don't requeue something already on a list */
														
 
															+	if (test_and_set_bit(0, &work->flags))
														
 
															+		goto out;
														
 
															+
														
 
															+	worker = find_worker(workers);
														
 
															+
														
 
															+	spin_lock_irqsave(&worker->lock, flags);
														
 
															+	atomic_inc(&worker->num_pending);
														
 
															+	check_busy_worker(worker);
														
 
															+	list_add_tail(&work->list, &worker->pending);
														
 
															+
														
 
															+	/*
														
 
															+	 * avoid calling into wake_up_process if this thread has already
														
 
															+	 * been kicked
														
 
															+	 */
														
 
															+	if (!worker->working)
														
 
															+		wake = 1;
														
 
															+	worker->working = 1;
														
 
															+
														
 
															+	spin_unlock_irqrestore(&worker->lock, flags);
														
 
															+
														
 
															+	if (wake)
														
 
															+		wake_up_process(worker->task);
														
 
															+out:
														
 
															+	return 0;
														
 
															+}
														
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -0,0 +1,85 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_ASYNC_THREAD_
														
 
															+#define __BTRFS_ASYNC_THREAD_
														
 
															+
														
 
															+struct btrfs_worker_thread;
														
 
															+
														
 
															+/*
														
 
															+ * This is similar to a workqueue, but it is meant to spread the operations
														
 
															+ * across all available cpus instead of just the CPU that was used to
														
 
															+ * queue the work.  There is also some batching introduced to try and
														
 
															+ * cut down on context switches.
														
 
															+ *
														
 
															+ * By default threads are added on demand up to 2 * the number of cpus.
														
 
															+ * Changing struct btrfs_workers->max_workers is one way to prevent
														
 
															+ * demand creation of kthreads.
														
 
															+ *
														
 
															+ * the basic model of these worker threads is to embed a btrfs_work
														
 
															+ * structure in your own data struct, and use container_of in a
														
 
															+ * work function to get back to your data struct.
														
 
															+ */
														
 
															+struct btrfs_work {
														
 
															+	/*
														
 
															+	 * only func should be set to the function you want called
														
 
															+	 * your work struct is passed as the only arg
														
 
															+	 */
														
 
															+	void (*func)(struct btrfs_work *work);
														
 
															+
														
 
															+	/*
														
 
															+	 * flags should be set to zero.  It is used to make sure the
														
 
															+	 * struct is only inserted once into the list.
														
 
															+	 */
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	/* don't touch these */
														
 
															+	struct btrfs_worker_thread *worker;
														
 
															+	struct list_head list;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_workers {
														
 
															+	/* current number of running workers */
														
 
															+	int num_workers;
														
 
															+
														
 
															+	/* max number of workers allowed.  changed by btrfs_start_workers */
														
 
															+	int max_workers;
														
 
															+
														
 
															+	/* once a worker has this many requests or fewer, it is idle */
														
 
															+	int idle_thresh;
														
 
															+
														
 
															+	/* list with all the work threads.  The workers on the idle thread
														
 
															+	 * may be actively servicing jobs, but they haven't yet hit the
														
 
															+	 * idle thresh limit above.
														
 
															+	 */
														
 
															+	struct list_head worker_list;
														
 
															+	struct list_head idle_list;
														
 
															+
														
 
															+	/* lock for finding the next worker thread to queue on */
														
 
															+	spinlock_t lock;
														
 
															+
														
 
															+	/* extra name for this worker, used for current->name */
														
 
															+	char *name;
														
 
															+};
														
 
															+
														
 
															+int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
														
 
															+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
														
 
															+int btrfs_stop_workers(struct btrfs_workers *workers);
														
 
															+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
														
 
															+int btrfs_requeue_work(struct btrfs_work *work);
														
 
															+#endif
														
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -0,0 +1,133 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_I__
														
 
															+#define __BTRFS_I__
														
 
															+
														
 
															+#include "extent_map.h"
														
 
															+#include "extent_io.h"
														
 
															+#include "ordered-data.h"
														
 
															+
														
 
															+/* in memory btrfs inode */
														
 
															+struct btrfs_inode {
														
 
															+	/* which subvolume this inode belongs to */
														
 
															+	struct btrfs_root *root;
														
 
															+
														
 
															+	/* the block group preferred for allocations.  This pointer is buggy
														
 
															+	 * and needs to be replaced with a bytenr instead
														
 
															+	 */
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+
														
 
															+	/* key used to find this inode on disk.  This is used by the code
														
 
															+	 * to read in roots of subvolumes
														
 
															+	 */
														
 
															+	struct btrfs_key location;
														
 
															+
														
 
															+	/* the extent_tree has caches of all the extent mappings to disk */
														
 
															+	struct extent_map_tree extent_tree;
														
 
															+
														
 
															+	/* the io_tree does range state (DIRTY, LOCKED etc) */
														
 
															+	struct extent_io_tree io_tree;
														
 
															+
														
 
															+	/* special utility tree used to record which mirrors have already been
														
 
															+	 * tried when checksums fail for a given block
														
 
															+	 */
														
 
															+	struct extent_io_tree io_failure_tree;
														
 
															+
														
 
															+	/* held while inserting checksums to avoid races */
														
 
															+	struct mutex csum_mutex;
														
 
															+
														
 
															+	/* held while inesrting or deleting extents from files */
														
 
															+	struct mutex extent_mutex;
														
 
															+
														
 
															+	/* held while logging the inode in tree-log.c */
														
 
															+	struct mutex log_mutex;
														
 
															+
														
 
															+	/* used to order data wrt metadata */
														
 
															+	struct btrfs_ordered_inode_tree ordered_tree;
														
 
															+
														
 
															+	/* standard acl pointers */
														
 
															+	struct posix_acl *i_acl;
														
 
															+	struct posix_acl *i_default_acl;
														
 
															+
														
 
															+	/* for keeping track of orphaned inodes */
														
 
															+	struct list_head i_orphan;
														
 
															+
														
 
															+	/* list of all the delalloc inodes in the FS.  There are times we need
														
 
															+	 * to write all the delalloc pages to disk, and this list is used
														
 
															+	 * to walk them all.
														
 
															+	 */
														
 
															+	struct list_head delalloc_inodes;
														
 
															+
														
 
															+	/* full 64 bit generation number, struct vfs_inode doesn't have a big
														
 
															+	 * enough field for this.
														
 
															+	 */
														
 
															+	u64 generation;
														
 
															+
														
 
															+	/*
														
 
															+	 * transid of the trans_handle that last modified this inode
														
 
															+	 */
														
 
															+	u64 last_trans;
														
 
															+	/*
														
 
															+	 * transid that last logged this inode
														
 
															+	 */
														
 
															+	u64 logged_trans;
														
 
															+
														
 
															+	/*
														
 
															+	 * trans that last made a change that should be fully fsync'd.  This
														
 
															+	 * gets reset to zero each time the inode is logged
														
 
															+	 */
														
 
															+	u64 log_dirty_trans;
														
 
															+
														
 
															+	/* total number of bytes pending delalloc, used by stat to calc the
														
 
															+	 * real block usage of the file
														
 
															+	 */
														
 
															+	u64 delalloc_bytes;
														
 
															+
														
 
															+	/*
														
 
															+	 * the size of the file stored in the metadata on disk.  data=ordered
														
 
															+	 * means the in-memory i_size might be larger than the size on disk
														
 
															+	 * because not all the blocks are written yet.
														
 
															+	 */
														
 
															+	u64 disk_i_size;
														
 
															+
														
 
															+	/* flags field from the on disk inode */
														
 
															+	u32 flags;
														
 
															+
														
 
															+	/*
														
 
															+	 * if this is a directory then index_cnt is the counter for the index
														
 
															+	 * number for new files that are created
														
 
															+	 */
														
 
															+	u64 index_cnt;
														
 
															+
														
 
															+	struct inode vfs_inode;
														
 
															+};
														
 
															+
														
 
															+static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
														
 
															+{
														
 
															+	return container_of(inode, struct btrfs_inode, vfs_inode);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_i_size_write(struct inode *inode, u64 size)
														
 
															+{
														
 
															+	inode->i_size = size;
														
 
															+	BTRFS_I(inode)->disk_i_size = size;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+#endif
														
--- a/fs/btrfs/compat.h
+++ b/fs/btrfs/compat.h
@@ -0,0 +1,24 @@
 
															+#ifndef _COMPAT_H_
														
 
															+#define _COMPAT_H_
														
 
															+
														
 
															+#define btrfs_drop_nlink(inode) drop_nlink(inode)
														
 
															+#define btrfs_inc_nlink(inode)	inc_nlink(inode)
														
 
															+
														
 
															+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
														
 
															+static inline struct dentry *d_obtain_alias(struct inode *inode)
														
 
															+{
														
 
															+	struct dentry *d;
														
 
															+
														
 
															+	if (!inode)
														
 
															+		return NULL;
														
 
															+	if (IS_ERR(inode))
														
 
															+		return ERR_CAST(inode);
														
 
															+
														
 
															+	d = d_alloc_anon(inode);
														
 
															+	if (!d)
														
 
															+		iput(inode);
														
 
															+	return d;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#endif /* _COMPAT_H_ */
														
--- a/fs/btrfs/crc32c.h
+++ b/fs/btrfs/crc32c.h
@@ -0,0 +1,120 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_CRC32C__
														
 
															+#define __BTRFS_CRC32C__
														
 
															+#include <asm/byteorder.h>
														
 
															+#include <linux/crc32c.h>
														
 
															+#include <linux/version.h>
														
 
															+
														
 
															+/* #define CONFIG_BTRFS_HW_SUM 1 */
														
 
															+
														
 
															+#ifdef CONFIG_BTRFS_HW_SUM
														
 
															+#ifdef CONFIG_X86
														
 
															+/*
														
 
															+ * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
														
 
															+ * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
														
 
															+ * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
														
 
															+ * http://www.intel.com/products/processor/manuals/
														
 
															+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
														
 
															+ * Volume 2A: Instruction Set Reference, A-M
														
 
															+ */
														
 
															+
														
 
															+#include <asm/cpufeature.h>
														
 
															+#include <asm/processor.h>
														
 
															+
														
 
															+#define X86_FEATURE_XMM4_2     (4*32+20) /* Streaming SIMD Extensions-4.2 */
														
 
															+#define cpu_has_xmm4_2         boot_cpu_has(X86_FEATURE_XMM4_2)
														
 
															+
														
 
															+#ifdef CONFIG_X86_64
														
 
															+#define REX_PRE	"0x48, "
														
 
															+#define SCALE_F	8
														
 
															+#else
														
 
															+#define REX_PRE
														
 
															+#define SCALE_F	4
														
 
															+#endif
														
 
															+
														
 
															+static inline u32 btrfs_crc32c_le_hw_byte(u32 crc, unsigned char const *data,
														
 
															+				   size_t length)
														
 
															+{
														
 
															+	while (length--) {
														
 
															+		__asm__ __volatile__(
														
 
															+			".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
														
 
															+			:"=S"(crc)
														
 
															+			:"0"(crc), "c"(*data)
														
 
															+		);
														
 
															+		data++;
														
 
															+	}
														
 
															+
														
 
															+	return crc;
														
 
															+}
														
 
															+
														
 
															+static inline u32 __pure btrfs_crc32c_le_hw(u32 crc, unsigned char const *p,
														
 
															+				     size_t len)
														
 
															+{
														
 
															+	unsigned int iquotient = len / SCALE_F;
														
 
															+	unsigned int iremainder = len % SCALE_F;
														
 
															+#ifdef CONFIG_X86_64
														
 
															+	u64 *ptmp = (u64 *)p;
														
 
															+#else
														
 
															+	u32 *ptmp = (u32 *)p;
														
 
															+#endif
														
 
															+
														
 
															+	while (iquotient--) {
														
 
															+		__asm__ __volatile__(
														
 
															+			".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
														
 
															+			:"=S"(crc)
														
 
															+			:"0"(crc), "c"(*ptmp)
														
 
															+		);
														
 
															+		ptmp++;
														
 
															+	}
														
 
															+
														
 
															+	if (iremainder)
														
 
															+		crc = btrfs_crc32c_le_hw_byte(crc, (unsigned char *)ptmp,
														
 
															+					      iremainder);
														
 
															+
														
 
															+	return crc;
														
 
															+}
														
 
															+#endif /* CONFIG_BTRFS_HW_SUM */
														
 
															+
														
 
															+static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address,
														
 
															+				 size_t len)
														
 
															+{
														
 
															+#ifdef CONFIG_BTRFS_HW_SUM
														
 
															+	if (cpu_has_xmm4_2)
														
 
															+		return btrfs_crc32c_le_hw(crc, address, len);
														
 
															+#endif
														
 
															+	return crc32c_le(crc, address, len);
														
 
															+}
														
 
															+
														
 
															+#else
														
 
															+
														
 
															+#define __btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
														
 
															+
														
 
															+#endif /* CONFIG_X86 */
														
 
															+
														
 
															+/**
														
 
															+ * implementation of crc32c_le() changed in linux-2.6.23,
														
 
															+ * has of v0.13 btrfs-progs is using the latest version.
														
 
															+ * We must workaround older implementations of crc32c_le()
														
 
															+ * found on older kernel versions.
														
 
															+ */
														
 
															+#define btrfs_crc32c(seed, data, length) \
														
 
															+	__btrfs_crc32c(seed, (unsigned char const *)data, length)
														
 
															+#endif
														
 
															+
														
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -0,0 +1,3716 @@
 
															+/*
														
 
															+ * Copyright (C) 2007,2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/sched.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "locking.h"
														
 
															+
														
 
															+static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_path *path, int level);
														
 
															+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *ins_key,
														
 
															+		      struct btrfs_path *path, int data_size, int extend);
														
 
															+static int push_node_left(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, struct extent_buffer *dst,
														
 
															+			  struct extent_buffer *src, int empty);
														
 
															+static int balance_node_right(struct btrfs_trans_handle *trans,
														
 
															+			      struct btrfs_root *root,
														
 
															+			      struct extent_buffer *dst_buf,
														
 
															+			      struct extent_buffer *src_buf);
														
 
															+static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		   struct btrfs_path *path, int level, int slot);
														
 
															+
														
 
															+inline void btrfs_init_path(struct btrfs_path *p)
														
 
															+{
														
 
															+	memset(p, 0, sizeof(*p));
														
 
															+}
														
 
															+
														
 
															+struct btrfs_path *btrfs_alloc_path(void)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
														
 
															+	if (path) {
														
 
															+		btrfs_init_path(path);
														
 
															+		path->reada = 1;
														
 
															+	}
														
 
															+	return path;
														
 
															+}
														
 
															+
														
 
															+/* this also releases the path */
														
 
															+void btrfs_free_path(struct btrfs_path *p)
														
 
															+{
														
 
															+	btrfs_release_path(NULL, p);
														
 
															+	kmem_cache_free(btrfs_path_cachep, p);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * path release drops references on the extent buffers in the path
														
 
															+ * and it drops any locks held by this path
														
 
															+ *
														
 
															+ * It is safe to call this on paths that no locks or extent buffers held.
														
 
															+ */
														
 
															+void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
														
 
															+		p->slots[i] = 0;
														
 
															+		if (!p->nodes[i])
														
 
															+			continue;
														
 
															+		if (p->locks[i]) {
														
 
															+			btrfs_tree_unlock(p->nodes[i]);
														
 
															+			p->locks[i] = 0;
														
 
															+		}
														
 
															+		free_extent_buffer(p->nodes[i]);
														
 
															+		p->nodes[i] = NULL;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * safely gets a reference on the root node of a tree.  A lock
														
 
															+ * is not taken, so a concurrent writer may put a different node
														
 
															+ * at the root of the tree.  See btrfs_lock_root_node for the
														
 
															+ * looping required.
														
 
															+ *
														
 
															+ * The extent buffer returned by this has a reference taken, so
														
 
															+ * it won't disappear.  It may stop being the root of the tree
														
 
															+ * at any time because there are no locks held.
														
 
															+ */
														
 
															+struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct extent_buffer *eb;
														
 
															+	spin_lock(&root->node_lock);
														
 
															+	eb = root->node;
														
 
															+	extent_buffer_get(eb);
														
 
															+	spin_unlock(&root->node_lock);
														
 
															+	return eb;
														
 
															+}
														
 
															+
														
 
															+/* loop around taking references on and locking the root node of the
														
 
															+ * tree until you end up with a lock on the root.  A locked buffer
														
 
															+ * is returned, with a reference held.
														
 
															+ */
														
 
															+struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct extent_buffer *eb;
														
 
															+
														
 
															+	while(1) {
														
 
															+		eb = btrfs_root_node(root);
														
 
															+		btrfs_tree_lock(eb);
														
 
															+
														
 
															+		spin_lock(&root->node_lock);
														
 
															+		if (eb == root->node) {
														
 
															+			spin_unlock(&root->node_lock);
														
 
															+			break;
														
 
															+		}
														
 
															+		spin_unlock(&root->node_lock);
														
 
															+
														
 
															+		btrfs_tree_unlock(eb);
														
 
															+		free_extent_buffer(eb);
														
 
															+	}
														
 
															+	return eb;
														
 
															+}
														
 
															+
														
 
															+/* cowonly root (everything not a reference counted cow subvolume), just get
														
 
															+ * put onto a simple dirty list.  transaction.c walks this to make sure they
														
 
															+ * get properly updated on disk.
														
 
															+ */
														
 
															+static void add_root_to_dirty_list(struct btrfs_root *root)
														
 
															+{
														
 
															+	if (root->track_dirty && list_empty(&root->dirty_list)) {
														
 
															+		list_add(&root->dirty_list,
														
 
															+			 &root->fs_info->dirty_cowonly_roots);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * used by snapshot creation to make a copy of a root for a tree with
														
 
															+ * a given objectid.  The buffer with the new root node is returned in
														
 
															+ * cow_ret, and this func returns zero on success or a negative error code.
														
 
															+ */
														
 
															+int btrfs_copy_root(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *root,
														
 
															+		      struct extent_buffer *buf,
														
 
															+		      struct extent_buffer **cow_ret, u64 new_root_objectid)
														
 
															+{
														
 
															+	struct extent_buffer *cow;
														
 
															+	u32 nritems;
														
 
															+	int ret = 0;
														
 
															+	int level;
														
 
															+	struct btrfs_root *new_root;
														
 
															+
														
 
															+	new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
														
 
															+	if (!new_root)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	memcpy(new_root, root, sizeof(*new_root));
														
 
															+	new_root->root_key.objectid = new_root_objectid;
														
 
															+
														
 
															+	WARN_ON(root->ref_cows && trans->transid !=
														
 
															+		root->fs_info->running_transaction->transid);
														
 
															+	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
														
 
															+
														
 
															+	level = btrfs_header_level(buf);
														
 
															+	nritems = btrfs_header_nritems(buf);
														
 
															+
														
 
															+	cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
														
 
															+				     new_root_objectid, trans->transid,
														
 
															+				     level, buf->start, 0);
														
 
															+	if (IS_ERR(cow)) {
														
 
															+		kfree(new_root);
														
 
															+		return PTR_ERR(cow);
														
 
															+	}
														
 
															+
														
 
															+	copy_extent_buffer(cow, buf, 0, 0, cow->len);
														
 
															+	btrfs_set_header_bytenr(cow, cow->start);
														
 
															+	btrfs_set_header_generation(cow, trans->transid);
														
 
															+	btrfs_set_header_owner(cow, new_root_objectid);
														
 
															+	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
														
 
															+
														
 
															+	WARN_ON(btrfs_header_generation(buf) > trans->transid);
														
 
															+	ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
														
 
															+	kfree(new_root);
														
 
															+
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(cow);
														
 
															+	*cow_ret = cow;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * does the dirty work in cow of a single block.  The parent block
														
 
															+ * (if supplied) is updated to point to the new cow copy.  The new
														
 
															+ * buffer is marked dirty and returned locked.  If you modify the block
														
 
															+ * it needs to be marked dirty again.
														
 
															+ *
														
 
															+ * search_start -- an allocation hint for the new block
														
 
															+ *
														
 
															+ * empty_size -- a hint that you plan on doing more cow.  This is the size in bytes
														
 
															+ * the allocator should try to find free next to the block it returns.  This is
														
 
															+ * just a hint and may be ignored by the allocator.
														
 
															+ *
														
 
															+ * prealloc_dest -- if you have already reserved a destination for the cow,
														
 
															+ * this uses that block instead of allocating a new one.  btrfs_alloc_reserved_extent
														
 
															+ * is used to finish the allocation.
														
 
															+ */
														
 
															+int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     struct extent_buffer *buf,
														
 
															+			     struct extent_buffer *parent, int parent_slot,
														
 
															+			     struct extent_buffer **cow_ret,
														
 
															+			     u64 search_start, u64 empty_size,
														
 
															+			     u64 prealloc_dest)
														
 
															+{
														
 
															+	u64 parent_start;
														
 
															+	struct extent_buffer *cow;
														
 
															+	u32 nritems;
														
 
															+	int ret = 0;
														
 
															+	int level;
														
 
															+	int unlock_orig = 0;
														
 
															+
														
 
															+	if (*cow_ret == buf)
														
 
															+		unlock_orig = 1;
														
 
															+
														
 
															+	WARN_ON(!btrfs_tree_locked(buf));
														
 
															+
														
 
															+	if (parent)
														
 
															+		parent_start = parent->start;
														
 
															+	else
														
 
															+		parent_start = 0;
														
 
															+
														
 
															+	WARN_ON(root->ref_cows && trans->transid !=
														
 
															+		root->fs_info->running_transaction->transid);
														
 
															+	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
														
 
															+
														
 
															+	level = btrfs_header_level(buf);
														
 
															+	nritems = btrfs_header_nritems(buf);
														
 
															+
														
 
															+	if (prealloc_dest) {
														
 
															+		struct btrfs_key ins;
														
 
															+
														
 
															+		ins.objectid = prealloc_dest;
														
 
															+		ins.offset = buf->len;
														
 
															+		ins.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+
														
 
															+		ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
														
 
															+						  root->root_key.objectid,
														
 
															+						  trans->transid, level, &ins);
														
 
															+		BUG_ON(ret);
														
 
															+		cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
														
 
															+					    buf->len);
														
 
															+	} else {
														
 
															+		cow = btrfs_alloc_free_block(trans, root, buf->len,
														
 
															+					     parent_start,
														
 
															+					     root->root_key.objectid,
														
 
															+					     trans->transid, level,
														
 
															+					     search_start, empty_size);
														
 
															+	}
														
 
															+	if (IS_ERR(cow))
														
 
															+		return PTR_ERR(cow);
														
 
															+
														
 
															+	copy_extent_buffer(cow, buf, 0, 0, cow->len);
														
 
															+	btrfs_set_header_bytenr(cow, cow->start);
														
 
															+	btrfs_set_header_generation(cow, trans->transid);
														
 
															+	btrfs_set_header_owner(cow, root->root_key.objectid);
														
 
															+	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
														
 
															+
														
 
															+	WARN_ON(btrfs_header_generation(buf) > trans->transid);
														
 
															+	if (btrfs_header_generation(buf) != trans->transid) {
														
 
															+		u32 nr_extents;
														
 
															+		ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+
														
 
															+		ret = btrfs_cache_ref(trans, root, buf, nr_extents);
														
 
															+		WARN_ON(ret);
														
 
															+	} else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
														
 
															+		/*
														
 
															+		 * There are only two places that can drop reference to
														
 
															+		 * tree blocks owned by living reloc trees, one is here,
														
 
															+		 * the other place is btrfs_merge_path. In both places,
														
 
															+		 * we check reference count while tree block is locked.
														
 
															+		 * Furthermore, if reference count is one, it won't get
														
 
															+		 * increased by someone else.
														
 
															+		 */
														
 
															+		u32 refs;
														
 
															+		ret = btrfs_lookup_extent_ref(trans, root, buf->start,
														
 
															+					      buf->len, &refs);
														
 
															+		BUG_ON(ret);
														
 
															+		if (refs == 1) {
														
 
															+			ret = btrfs_update_ref(trans, root, buf, cow,
														
 
															+					       0, nritems);
														
 
															+			clean_tree_block(trans, root, buf);
														
 
															+		} else {
														
 
															+			ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
														
 
															+		}
														
 
															+		BUG_ON(ret);
														
 
															+	} else {
														
 
															+		ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+		clean_tree_block(trans, root, buf);
														
 
															+	}
														
 
															+
														
 
															+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
														
 
															+		ret = btrfs_add_reloc_mapping(root, buf->start,
														
 
															+					      buf->len, cow->start);
														
 
															+		BUG_ON(ret);
														
 
															+		ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
														
 
															+		WARN_ON(ret);
														
 
															+	}
														
 
															+
														
 
															+	if (buf == root->node) {
														
 
															+		WARN_ON(parent && parent != buf);
														
 
															+
														
 
															+		spin_lock(&root->node_lock);
														
 
															+		root->node = cow;
														
 
															+		extent_buffer_get(cow);
														
 
															+		spin_unlock(&root->node_lock);
														
 
															+
														
 
															+		if (buf != root->commit_root) {
														
 
															+			btrfs_free_extent(trans, root, buf->start,
														
 
															+					  buf->len, buf->start,
														
 
															+					  root->root_key.objectid,
														
 
															+					  btrfs_header_generation(buf),
														
 
															+					  level, 1);
														
 
															+		}
														
 
															+		free_extent_buffer(buf);
														
 
															+		add_root_to_dirty_list(root);
														
 
															+	} else {
														
 
															+		btrfs_set_node_blockptr(parent, parent_slot,
														
 
															+					cow->start);
														
 
															+		WARN_ON(trans->transid == 0);
														
 
															+		btrfs_set_node_ptr_generation(parent, parent_slot,
														
 
															+					      trans->transid);
														
 
															+		btrfs_mark_buffer_dirty(parent);
														
 
															+		WARN_ON(btrfs_header_generation(parent) != trans->transid);
														
 
															+		btrfs_free_extent(trans, root, buf->start, buf->len,
														
 
															+				  parent_start, btrfs_header_owner(parent),
														
 
															+				  btrfs_header_generation(parent), level, 1);
														
 
															+	}
														
 
															+	if (unlock_orig)
														
 
															+		btrfs_tree_unlock(buf);
														
 
															+	free_extent_buffer(buf);
														
 
															+	btrfs_mark_buffer_dirty(cow);
														
 
															+	*cow_ret = cow;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * cows a single block, see __btrfs_cow_block for the real work.
														
 
															+ * This version of it has extra checks so that a block isn't cow'd more than
														
 
															+ * once per transaction, as long as it hasn't been written yet
														
 
															+ */
														
 
															+int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct extent_buffer *buf,
														
 
															+		    struct extent_buffer *parent, int parent_slot,
														
 
															+		    struct extent_buffer **cow_ret, u64 prealloc_dest)
														
 
															+{
														
 
															+	u64 search_start;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (trans->transaction != root->fs_info->running_transaction) {
														
 
															+		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
														
 
															+		       root->fs_info->running_transaction->transid);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+	if (trans->transid != root->fs_info->generation) {
														
 
															+		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
														
 
															+		       root->fs_info->generation);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	spin_lock(&root->fs_info->hash_lock);
														
 
															+	if (btrfs_header_generation(buf) == trans->transid &&
														
 
															+	    btrfs_header_owner(buf) == root->root_key.objectid &&
														
 
															+	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
														
 
															+		*cow_ret = buf;
														
 
															+		spin_unlock(&root->fs_info->hash_lock);
														
 
															+		WARN_ON(prealloc_dest);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	spin_unlock(&root->fs_info->hash_lock);
														
 
															+	search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
														
 
															+	ret = __btrfs_cow_block(trans, root, buf, parent,
														
 
															+				 parent_slot, cow_ret, search_start, 0,
														
 
															+				 prealloc_dest);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function for defrag to decide if two blocks pointed to by a
														
 
															+ * node are actually close by
														
 
															+ */
														
 
															+static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
														
 
															+{
														
 
															+	if (blocknr < other && other - (blocknr + blocksize) < 32768)
														
 
															+		return 1;
														
 
															+	if (blocknr > other && blocknr - (other + blocksize) < 32768)
														
 
															+		return 1;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * compare two keys in a memcmp fashion
														
 
															+ */
														
 
															+static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
														
 
															+{
														
 
															+	struct btrfs_key k1;
														
 
															+
														
 
															+	btrfs_disk_key_to_cpu(&k1, disk);
														
 
															+
														
 
															+	if (k1.objectid > k2->objectid)
														
 
															+		return 1;
														
 
															+	if (k1.objectid < k2->objectid)
														
 
															+		return -1;
														
 
															+	if (k1.type > k2->type)
														
 
															+		return 1;
														
 
															+	if (k1.type < k2->type)
														
 
															+		return -1;
														
 
															+	if (k1.offset > k2->offset)
														
 
															+		return 1;
														
 
															+	if (k1.offset < k2->offset)
														
 
															+		return -1;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * this is used by the defrag code to go through all the
														
 
															+ * leaves pointed to by a node and reallocate them so that
														
 
															+ * disk order is close to key order
														
 
															+ */
														
 
															+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root, struct extent_buffer *parent,
														
 
															+		       int start_slot, int cache_only, u64 *last_ret,
														
 
															+		       struct btrfs_key *progress)
														
 
															+{
														
 
															+	struct extent_buffer *cur;
														
 
															+	u64 blocknr;
														
 
															+	u64 gen;
														
 
															+	u64 search_start = *last_ret;
														
 
															+	u64 last_block = 0;
														
 
															+	u64 other;
														
 
															+	u32 parent_nritems;
														
 
															+	int end_slot;
														
 
															+	int i;
														
 
															+	int err = 0;
														
 
															+	int parent_level;
														
 
															+	int uptodate;
														
 
															+	u32 blocksize;
														
 
															+	int progress_passed = 0;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+
														
 
															+	parent_level = btrfs_header_level(parent);
														
 
															+	if (cache_only && parent_level != 1)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (trans->transaction != root->fs_info->running_transaction) {
														
 
															+		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
														
 
															+		       root->fs_info->running_transaction->transid);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+	if (trans->transid != root->fs_info->generation) {
														
 
															+		printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
														
 
															+		       root->fs_info->generation);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	parent_nritems = btrfs_header_nritems(parent);
														
 
															+	blocksize = btrfs_level_size(root, parent_level - 1);
														
 
															+	end_slot = parent_nritems;
														
 
															+
														
 
															+	if (parent_nritems == 1)
														
 
															+		return 0;
														
 
															+
														
 
															+	for (i = start_slot; i < end_slot; i++) {
														
 
															+		int close = 1;
														
 
															+
														
 
															+		if (!parent->map_token) {
														
 
															+			map_extent_buffer(parent,
														
 
															+					btrfs_node_key_ptr_offset(i),
														
 
															+					sizeof(struct btrfs_key_ptr),
														
 
															+					&parent->map_token, &parent->kaddr,
														
 
															+					&parent->map_start, &parent->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+		btrfs_node_key(parent, &disk_key, i);
														
 
															+		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
														
 
															+			continue;
														
 
															+
														
 
															+		progress_passed = 1;
														
 
															+		blocknr = btrfs_node_blockptr(parent, i);
														
 
															+		gen = btrfs_node_ptr_generation(parent, i);
														
 
															+		if (last_block == 0)
														
 
															+			last_block = blocknr;
														
 
															+
														
 
															+		if (i > 0) {
														
 
															+			other = btrfs_node_blockptr(parent, i - 1);
														
 
															+			close = close_blocks(blocknr, other, blocksize);
														
 
															+		}
														
 
															+		if (!close && i < end_slot - 2) {
														
 
															+			other = btrfs_node_blockptr(parent, i + 1);
														
 
															+			close = close_blocks(blocknr, other, blocksize);
														
 
															+		}
														
 
															+		if (close) {
														
 
															+			last_block = blocknr;
														
 
															+			continue;
														
 
															+		}
														
 
															+		if (parent->map_token) {
														
 
															+			unmap_extent_buffer(parent, parent->map_token,
														
 
															+					    KM_USER1);
														
 
															+			parent->map_token = NULL;
														
 
															+		}
														
 
															+
														
 
															+		cur = btrfs_find_tree_block(root, blocknr, blocksize);
														
 
															+		if (cur)
														
 
															+			uptodate = btrfs_buffer_uptodate(cur, gen);
														
 
															+		else
														
 
															+			uptodate = 0;
														
 
															+		if (!cur || !uptodate) {
														
 
															+			if (cache_only) {
														
 
															+				free_extent_buffer(cur);
														
 
															+				continue;
														
 
															+			}
														
 
															+			if (!cur) {
														
 
															+				cur = read_tree_block(root, blocknr,
														
 
															+							 blocksize, gen);
														
 
															+			} else if (!uptodate) {
														
 
															+				btrfs_read_buffer(cur, gen);
														
 
															+			}
														
 
															+		}
														
 
															+		if (search_start == 0)
														
 
															+			search_start = last_block;
														
 
															+
														
 
															+		btrfs_tree_lock(cur);
														
 
															+		err = __btrfs_cow_block(trans, root, cur, parent, i,
														
 
															+					&cur, search_start,
														
 
															+					min(16 * blocksize,
														
 
															+					    (end_slot - i) * blocksize), 0);
														
 
															+		if (err) {
														
 
															+			btrfs_tree_unlock(cur);
														
 
															+			free_extent_buffer(cur);
														
 
															+			break;
														
 
															+		}
														
 
															+		search_start = cur->start;
														
 
															+		last_block = cur->start;
														
 
															+		*last_ret = search_start;
														
 
															+		btrfs_tree_unlock(cur);
														
 
															+		free_extent_buffer(cur);
														
 
															+	}
														
 
															+	if (parent->map_token) {
														
 
															+		unmap_extent_buffer(parent, parent->map_token,
														
 
															+				    KM_USER1);
														
 
															+		parent->map_token = NULL;
														
 
															+	}
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * The leaf data grows from end-to-front in the node.
														
 
															+ * this returns the address of the start of the last item,
														
 
															+ * which is the stop of the leaf data stack
														
 
															+ */
														
 
															+static inline unsigned int leaf_data_end(struct btrfs_root *root,
														
 
															+					 struct extent_buffer *leaf)
														
 
															+{
														
 
															+	u32 nr = btrfs_header_nritems(leaf);
														
 
															+	if (nr == 0)
														
 
															+		return BTRFS_LEAF_DATA_SIZE(root);
														
 
															+	return btrfs_item_offset_nr(leaf, nr - 1);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * extra debugging checks to make sure all the items in a key are
														
 
															+ * well formed and in the proper order
														
 
															+ */
														
 
															+static int check_node(struct btrfs_root *root, struct btrfs_path *path,
														
 
															+		      int level)
														
 
															+{
														
 
															+	struct extent_buffer *parent = NULL;
														
 
															+	struct extent_buffer *node = path->nodes[level];
														
 
															+	struct btrfs_disk_key parent_key;
														
 
															+	struct btrfs_disk_key node_key;
														
 
															+	int parent_slot;
														
 
															+	int slot;
														
 
															+	struct btrfs_key cpukey;
														
 
															+	u32 nritems = btrfs_header_nritems(node);
														
 
															+
														
 
															+	if (path->nodes[level + 1])
														
 
															+		parent = path->nodes[level + 1];
														
 
															+
														
 
															+	slot = path->slots[level];
														
 
															+	BUG_ON(nritems == 0);
														
 
															+	if (parent) {
														
 
															+		parent_slot = path->slots[level + 1];
														
 
															+		btrfs_node_key(parent, &parent_key, parent_slot);
														
 
															+		btrfs_node_key(node, &node_key, 0);
														
 
															+		BUG_ON(memcmp(&parent_key, &node_key,
														
 
															+			      sizeof(struct btrfs_disk_key)));
														
 
															+		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
														
 
															+		       btrfs_header_bytenr(node));
														
 
															+	}
														
 
															+	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
														
 
															+	if (slot != 0) {
														
 
															+		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
														
 
															+		btrfs_node_key(node, &node_key, slot);
														
 
															+		BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
														
 
															+	}
														
 
															+	if (slot < nritems - 1) {
														
 
															+		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
														
 
															+		btrfs_node_key(node, &node_key, slot);
														
 
															+		BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * extra checking to make sure all the items in a leaf are
														
 
															+ * well formed and in the proper order
														
 
															+ */
														
 
															+static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
														
 
															+		      int level)
														
 
															+{
														
 
															+	struct extent_buffer *leaf = path->nodes[level];
														
 
															+	struct extent_buffer *parent = NULL;
														
 
															+	int parent_slot;
														
 
															+	struct btrfs_key cpukey;
														
 
															+	struct btrfs_disk_key parent_key;
														
 
															+	struct btrfs_disk_key leaf_key;
														
 
															+	int slot = path->slots[0];
														
 
															+
														
 
															+	u32 nritems = btrfs_header_nritems(leaf);
														
 
															+
														
 
															+	if (path->nodes[level + 1])
														
 
															+		parent = path->nodes[level + 1];
														
 
															+
														
 
															+	if (nritems == 0)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (parent) {
														
 
															+		parent_slot = path->slots[level + 1];
														
 
															+		btrfs_node_key(parent, &parent_key, parent_slot);
														
 
															+		btrfs_item_key(leaf, &leaf_key, 0);
														
 
															+
														
 
															+		BUG_ON(memcmp(&parent_key, &leaf_key,
														
 
															+		       sizeof(struct btrfs_disk_key)));
														
 
															+		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
														
 
															+		       btrfs_header_bytenr(leaf));
														
 
															+	}
														
 
															+#if 0
														
 
															+	for (i = 0; nritems > 1 && i < nritems - 2; i++) {
														
 
															+		btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
														
 
															+		btrfs_item_key(leaf, &leaf_key, i);
														
 
															+		if (comp_keys(&leaf_key, &cpukey) >= 0) {
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("slot %d offset bad key\n", i);
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+		if (btrfs_item_offset_nr(leaf, i) !=
														
 
															+			btrfs_item_end_nr(leaf, i + 1)) {
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("slot %d offset bad\n", i);
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+		if (i == 0) {
														
 
															+			if (btrfs_item_offset_nr(leaf, i) +
														
 
															+			       btrfs_item_size_nr(leaf, i) !=
														
 
															+			       BTRFS_LEAF_DATA_SIZE(root)) {
														
 
															+				btrfs_print_leaf(root, leaf);
														
 
															+				printk("slot %d first offset bad\n", i);
														
 
															+				BUG_ON(1);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	if (nritems > 0) {
														
 
															+		if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
														
 
															+				btrfs_print_leaf(root, leaf);
														
 
															+				printk("slot %d bad size \n", nritems - 1);
														
 
															+				BUG_ON(1);
														
 
															+		}
														
 
															+	}
														
 
															+#endif
														
 
															+	if (slot != 0 && slot < nritems - 1) {
														
 
															+		btrfs_item_key(leaf, &leaf_key, slot);
														
 
															+		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
														
 
															+		if (comp_keys(&leaf_key, &cpukey) <= 0) {
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("slot %d offset bad key\n", slot);
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+		if (btrfs_item_offset_nr(leaf, slot - 1) !=
														
 
															+		       btrfs_item_end_nr(leaf, slot)) {
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("slot %d offset bad\n", slot);
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+	}
														
 
															+	if (slot < nritems - 1) {
														
 
															+		btrfs_item_key(leaf, &leaf_key, slot);
														
 
															+		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
														
 
															+		BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
														
 
															+		if (btrfs_item_offset_nr(leaf, slot) !=
														
 
															+			btrfs_item_end_nr(leaf, slot + 1)) {
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("slot %d offset bad\n", slot);
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+	}
														
 
															+	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
														
 
															+	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline check_block(struct btrfs_root *root,
														
 
															+				struct btrfs_path *path, int level)
														
 
															+{
														
 
															+	u64 found_start;
														
 
															+	return 0;
														
 
															+	if (btrfs_header_level(path->nodes[level]) != level)
														
 
															+	    printk("warning: bad level %Lu wanted %d found %d\n",
														
 
															+		   path->nodes[level]->start, level,
														
 
															+		   btrfs_header_level(path->nodes[level]));
														
 
															+	found_start = btrfs_header_bytenr(path->nodes[level]);
														
 
															+	if (found_start != path->nodes[level]->start) {
														
 
															+	    printk("warning: bad bytentr %Lu found %Lu\n",
														
 
															+		   path->nodes[level]->start, found_start);
														
 
															+	}
														
 
															+#if 0
														
 
															+	struct extent_buffer *buf = path->nodes[level];
														
 
															+
														
 
															+	if (memcmp_extent_buffer(buf, root->fs_info->fsid,
														
 
															+				 (unsigned long)btrfs_header_fsid(buf),
														
 
															+				 BTRFS_FSID_SIZE)) {
														
 
															+		printk("warning bad block %Lu\n", buf->start);
														
 
															+		return 1;
														
 
															+	}
														
 
															+#endif
														
 
															+	if (level == 0)
														
 
															+		return check_leaf(root, path, level);
														
 
															+	return check_node(root, path, level);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * search for key in the extent_buffer.  The items start at offset p,
														
 
															+ * and they are item_size apart.  There are 'max' items in p.
														
 
															+ *
														
 
															+ * the slot in the array is returned via slot, and it points to
														
 
															+ * the place where you would insert key if it is not found in
														
 
															+ * the array.
														
 
															+ *
														
 
															+ * slot may point to max if the key is bigger than all of the keys
														
 
															+ */
														
 
															+static noinline int generic_bin_search(struct extent_buffer *eb,
														
 
															+				       unsigned long p,
														
 
															+				       int item_size, struct btrfs_key *key,
														
 
															+				       int max, int *slot)
														
 
															+{
														
 
															+	int low = 0;
														
 
															+	int high = max;
														
 
															+	int mid;
														
 
															+	int ret;
														
 
															+	struct btrfs_disk_key *tmp = NULL;
														
 
															+	struct btrfs_disk_key unaligned;
														
 
															+	unsigned long offset;
														
 
															+	char *map_token = NULL;
														
 
															+	char *kaddr = NULL;
														
 
															+	unsigned long map_start = 0;
														
 
															+	unsigned long map_len = 0;
														
 
															+	int err;
														
 
															+
														
 
															+	while(low < high) {
														
 
															+		mid = (low + high) / 2;
														
 
															+		offset = p + mid * item_size;
														
 
															+
														
 
															+		if (!map_token || offset < map_start ||
														
 
															+		    (offset + sizeof(struct btrfs_disk_key)) >
														
 
															+		    map_start + map_len) {
														
 
															+			if (map_token) {
														
 
															+				unmap_extent_buffer(eb, map_token, KM_USER0);
														
 
															+				map_token = NULL;
														
 
															+			}
														
 
															+			err = map_extent_buffer(eb, offset,
														
 
															+						sizeof(struct btrfs_disk_key),
														
 
															+						&map_token, &kaddr,
														
 
															+						&map_start, &map_len, KM_USER0);
														
 
															+
														
 
															+			if (!err) {
														
 
															+				tmp = (struct btrfs_disk_key *)(kaddr + offset -
														
 
															+							map_start);
														
 
															+			} else {
														
 
															+				read_extent_buffer(eb, &unaligned,
														
 
															+						   offset, sizeof(unaligned));
														
 
															+				tmp = &unaligned;
														
 
															+			}
														
 
															+
														
 
															+		} else {
														
 
															+			tmp = (struct btrfs_disk_key *)(kaddr + offset -
														
 
															+							map_start);
														
 
															+		}
														
 
															+		ret = comp_keys(tmp, key);
														
 
															+
														
 
															+		if (ret < 0)
														
 
															+			low = mid + 1;
														
 
															+		else if (ret > 0)
														
 
															+			high = mid;
														
 
															+		else {
														
 
															+			*slot = mid;
														
 
															+			if (map_token)
														
 
															+				unmap_extent_buffer(eb, map_token, KM_USER0);
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+	*slot = low;
														
 
															+	if (map_token)
														
 
															+		unmap_extent_buffer(eb, map_token, KM_USER0);
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * simple bin_search frontend that does the right thing for
														
 
															+ * leaves vs nodes
														
 
															+ */
														
 
															+static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
														
 
															+		      int level, int *slot)
														
 
															+{
														
 
															+	if (level == 0) {
														
 
															+		return generic_bin_search(eb,
														
 
															+					  offsetof(struct btrfs_leaf, items),
														
 
															+					  sizeof(struct btrfs_item),
														
 
															+					  key, btrfs_header_nritems(eb),
														
 
															+					  slot);
														
 
															+	} else {
														
 
															+		return generic_bin_search(eb,
														
 
															+					  offsetof(struct btrfs_node, ptrs),
														
 
															+					  sizeof(struct btrfs_key_ptr),
														
 
															+					  key, btrfs_header_nritems(eb),
														
 
															+					  slot);
														
 
															+	}
														
 
															+	return -1;
														
 
															+}
														
 
															+
														
 
															+/* given a node and slot number, this reads the blocks it points to.  The
														
 
															+ * extent buffer is returned with a reference taken (but unlocked).
														
 
															+ * NULL is returned on error.
														
 
															+ */
														
 
															+static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
														
 
															+				   struct extent_buffer *parent, int slot)
														
 
															+{
														
 
															+	int level = btrfs_header_level(parent);
														
 
															+	if (slot < 0)
														
 
															+		return NULL;
														
 
															+	if (slot >= btrfs_header_nritems(parent))
														
 
															+		return NULL;
														
 
															+
														
 
															+	BUG_ON(level == 0);
														
 
															+
														
 
															+	return read_tree_block(root, btrfs_node_blockptr(parent, slot),
														
 
															+		       btrfs_level_size(root, level - 1),
														
 
															+		       btrfs_node_ptr_generation(parent, slot));
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * node level balancing, used to make sure nodes are in proper order for
														
 
															+ * item deletion.  We balance from the top down, so we have to make sure
														
 
															+ * that a deletion won't leave an node completely empty later on.
														
 
															+ */
														
 
															+static noinline int balance_level(struct btrfs_trans_handle *trans,
														
 
															+			 struct btrfs_root *root,
														
 
															+			 struct btrfs_path *path, int level)
														
 
															+{
														
 
															+	struct extent_buffer *right = NULL;
														
 
															+	struct extent_buffer *mid;
														
 
															+	struct extent_buffer *left = NULL;
														
 
															+	struct extent_buffer *parent = NULL;
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	int pslot;
														
 
															+	int orig_slot = path->slots[level];
														
 
															+	int err_on_enospc = 0;
														
 
															+	u64 orig_ptr;
														
 
															+
														
 
															+	if (level == 0)
														
 
															+		return 0;
														
 
															+
														
 
															+	mid = path->nodes[level];
														
 
															+	WARN_ON(!path->locks[level]);
														
 
															+	WARN_ON(btrfs_header_generation(mid) != trans->transid);
														
 
															+
														
 
															+	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
														
 
															+
														
 
															+	if (level < BTRFS_MAX_LEVEL - 1)
														
 
															+		parent = path->nodes[level + 1];
														
 
															+	pslot = path->slots[level + 1];
														
 
															+
														
 
															+	/*
														
 
															+	 * deal with the case where there is only one pointer in the root
														
 
															+	 * by promoting the node below to a root
														
 
															+	 */
														
 
															+	if (!parent) {
														
 
															+		struct extent_buffer *child;
														
 
															+
														
 
															+		if (btrfs_header_nritems(mid) != 1)
														
 
															+			return 0;
														
 
															+
														
 
															+		/* promote the child to a root */
														
 
															+		child = read_node_slot(root, mid, 0);
														
 
															+		btrfs_tree_lock(child);
														
 
															+		BUG_ON(!child);
														
 
															+		ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		spin_lock(&root->node_lock);
														
 
															+		root->node = child;
														
 
															+		spin_unlock(&root->node_lock);
														
 
															+
														
 
															+		ret = btrfs_update_extent_ref(trans, root, child->start,
														
 
															+					      mid->start, child->start,
														
 
															+					      root->root_key.objectid,
														
 
															+					      trans->transid, level - 1);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		add_root_to_dirty_list(root);
														
 
															+		btrfs_tree_unlock(child);
														
 
															+		path->locks[level] = 0;
														
 
															+		path->nodes[level] = NULL;
														
 
															+		clean_tree_block(trans, root, mid);
														
 
															+		btrfs_tree_unlock(mid);
														
 
															+		/* once for the path */
														
 
															+		free_extent_buffer(mid);
														
 
															+		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
														
 
															+					mid->start, root->root_key.objectid,
														
 
															+					btrfs_header_generation(mid),
														
 
															+					level, 1);
														
 
															+		/* once for the root ptr */
														
 
															+		free_extent_buffer(mid);
														
 
															+		return ret;
														
 
															+	}
														
 
															+	if (btrfs_header_nritems(mid) >
														
 
															+	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (btrfs_header_nritems(mid) < 2)
														
 
															+		err_on_enospc = 1;
														
 
															+
														
 
															+	left = read_node_slot(root, parent, pslot - 1);
														
 
															+	if (left) {
														
 
															+		btrfs_tree_lock(left);
														
 
															+		wret = btrfs_cow_block(trans, root, left,
														
 
															+				       parent, pslot - 1, &left, 0);
														
 
															+		if (wret) {
														
 
															+			ret = wret;
														
 
															+			goto enospc;
														
 
															+		}
														
 
															+	}
														
 
															+	right = read_node_slot(root, parent, pslot + 1);
														
 
															+	if (right) {
														
 
															+		btrfs_tree_lock(right);
														
 
															+		wret = btrfs_cow_block(trans, root, right,
														
 
															+				       parent, pslot + 1, &right, 0);
														
 
															+		if (wret) {
														
 
															+			ret = wret;
														
 
															+			goto enospc;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* first, try to make some room in the middle buffer */
														
 
															+	if (left) {
														
 
															+		orig_slot += btrfs_header_nritems(left);
														
 
															+		wret = push_node_left(trans, root, left, mid, 1);
														
 
															+		if (wret < 0)
														
 
															+			ret = wret;
														
 
															+		if (btrfs_header_nritems(mid) < 2)
														
 
															+			err_on_enospc = 1;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * then try to empty the right most buffer into the middle
														
 
															+	 */
														
 
															+	if (right) {
														
 
															+		wret = push_node_left(trans, root, mid, right, 1);
														
 
															+		if (wret < 0 && wret != -ENOSPC)
														
 
															+			ret = wret;
														
 
															+		if (btrfs_header_nritems(right) == 0) {
														
 
															+			u64 bytenr = right->start;
														
 
															+			u64 generation = btrfs_header_generation(parent);
														
 
															+			u32 blocksize = right->len;
														
 
															+
														
 
															+			clean_tree_block(trans, root, right);
														
 
															+			btrfs_tree_unlock(right);
														
 
															+			free_extent_buffer(right);
														
 
															+			right = NULL;
														
 
															+			wret = del_ptr(trans, root, path, level + 1, pslot +
														
 
															+				       1);
														
 
															+			if (wret)
														
 
															+				ret = wret;
														
 
															+			wret = btrfs_free_extent(trans, root, bytenr,
														
 
															+						 blocksize, parent->start,
														
 
															+						 btrfs_header_owner(parent),
														
 
															+						 generation, level, 1);
														
 
															+			if (wret)
														
 
															+				ret = wret;
														
 
															+		} else {
														
 
															+			struct btrfs_disk_key right_key;
														
 
															+			btrfs_node_key(right, &right_key, 0);
														
 
															+			btrfs_set_node_key(parent, &right_key, pslot + 1);
														
 
															+			btrfs_mark_buffer_dirty(parent);
														
 
															+		}
														
 
															+	}
														
 
															+	if (btrfs_header_nritems(mid) == 1) {
														
 
															+		/*
														
 
															+		 * we're not allowed to leave a node with one item in the
														
 
															+		 * tree during a delete.  A deletion from lower in the tree
														
 
															+		 * could try to delete the only pointer in this node.
														
 
															+		 * So, pull some keys from the left.
														
 
															+		 * There has to be a left pointer at this point because
														
 
															+		 * otherwise we would have pulled some pointers from the
														
 
															+		 * right
														
 
															+		 */
														
 
															+		BUG_ON(!left);
														
 
															+		wret = balance_node_right(trans, root, mid, left);
														
 
															+		if (wret < 0) {
														
 
															+			ret = wret;
														
 
															+			goto enospc;
														
 
															+		}
														
 
															+		if (wret == 1) {
														
 
															+			wret = push_node_left(trans, root, left, mid, 1);
														
 
															+			if (wret < 0)
														
 
															+				ret = wret;
														
 
															+		}
														
 
															+		BUG_ON(wret == 1);
														
 
															+	}
														
 
															+	if (btrfs_header_nritems(mid) == 0) {
														
 
															+		/* we've managed to empty the middle node, drop it */
														
 
															+		u64 root_gen = btrfs_header_generation(parent);
														
 
															+		u64 bytenr = mid->start;
														
 
															+		u32 blocksize = mid->len;
														
 
															+
														
 
															+		clean_tree_block(trans, root, mid);
														
 
															+		btrfs_tree_unlock(mid);
														
 
															+		free_extent_buffer(mid);
														
 
															+		mid = NULL;
														
 
															+		wret = del_ptr(trans, root, path, level + 1, pslot);
														
 
															+		if (wret)
														
 
															+			ret = wret;
														
 
															+		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
														
 
															+					 parent->start,
														
 
															+					 btrfs_header_owner(parent),
														
 
															+					 root_gen, level, 1);
														
 
															+		if (wret)
														
 
															+			ret = wret;
														
 
															+	} else {
														
 
															+		/* update the parent key to reflect our changes */
														
 
															+		struct btrfs_disk_key mid_key;
														
 
															+		btrfs_node_key(mid, &mid_key, 0);
														
 
															+		btrfs_set_node_key(parent, &mid_key, pslot);
														
 
															+		btrfs_mark_buffer_dirty(parent);
														
 
															+	}
														
 
															+
														
 
															+	/* update the path */
														
 
															+	if (left) {
														
 
															+		if (btrfs_header_nritems(left) > orig_slot) {
														
 
															+			extent_buffer_get(left);
														
 
															+			/* left was locked after cow */
														
 
															+			path->nodes[level] = left;
														
 
															+			path->slots[level + 1] -= 1;
														
 
															+			path->slots[level] = orig_slot;
														
 
															+			if (mid) {
														
 
															+				btrfs_tree_unlock(mid);
														
 
															+				free_extent_buffer(mid);
														
 
															+			}
														
 
															+		} else {
														
 
															+			orig_slot -= btrfs_header_nritems(left);
														
 
															+			path->slots[level] = orig_slot;
														
 
															+		}
														
 
															+	}
														
 
															+	/* double check we haven't messed things up */
														
 
															+	check_block(root, path, level);
														
 
															+	if (orig_ptr !=
														
 
															+	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
														
 
															+		BUG();
														
 
															+enospc:
														
 
															+	if (right) {
														
 
															+		btrfs_tree_unlock(right);
														
 
															+		free_extent_buffer(right);
														
 
															+	}
														
 
															+	if (left) {
														
 
															+		if (path->nodes[level] != left)
														
 
															+			btrfs_tree_unlock(left);
														
 
															+		free_extent_buffer(left);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* Node balancing for insertion.  Here we only split or push nodes around
														
 
															+ * when they are completely full.  This is also done top down, so we
														
 
															+ * have to be pessimistic.
														
 
															+ */
														
 
															+static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path, int level)
														
 
															+{
														
 
															+	struct extent_buffer *right = NULL;
														
 
															+	struct extent_buffer *mid;
														
 
															+	struct extent_buffer *left = NULL;
														
 
															+	struct extent_buffer *parent = NULL;
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	int pslot;
														
 
															+	int orig_slot = path->slots[level];
														
 
															+	u64 orig_ptr;
														
 
															+
														
 
															+	if (level == 0)
														
 
															+		return 1;
														
 
															+
														
 
															+	mid = path->nodes[level];
														
 
															+	WARN_ON(btrfs_header_generation(mid) != trans->transid);
														
 
															+	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
														
 
															+
														
 
															+	if (level < BTRFS_MAX_LEVEL - 1)
														
 
															+		parent = path->nodes[level + 1];
														
 
															+	pslot = path->slots[level + 1];
														
 
															+
														
 
															+	if (!parent)
														
 
															+		return 1;
														
 
															+
														
 
															+	left = read_node_slot(root, parent, pslot - 1);
														
 
															+
														
 
															+	/* first, try to make some room in the middle buffer */
														
 
															+	if (left) {
														
 
															+		u32 left_nr;
														
 
															+
														
 
															+		btrfs_tree_lock(left);
														
 
															+		left_nr = btrfs_header_nritems(left);
														
 
															+		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
														
 
															+			wret = 1;
														
 
															+		} else {
														
 
															+			ret = btrfs_cow_block(trans, root, left, parent,
														
 
															+					      pslot - 1, &left, 0);
														
 
															+			if (ret)
														
 
															+				wret = 1;
														
 
															+			else {
														
 
															+				wret = push_node_left(trans, root,
														
 
															+						      left, mid, 0);
														
 
															+			}
														
 
															+		}
														
 
															+		if (wret < 0)
														
 
															+			ret = wret;
														
 
															+		if (wret == 0) {
														
 
															+			struct btrfs_disk_key disk_key;
														
 
															+			orig_slot += left_nr;
														
 
															+			btrfs_node_key(mid, &disk_key, 0);
														
 
															+			btrfs_set_node_key(parent, &disk_key, pslot);
														
 
															+			btrfs_mark_buffer_dirty(parent);
														
 
															+			if (btrfs_header_nritems(left) > orig_slot) {
														
 
															+				path->nodes[level] = left;
														
 
															+				path->slots[level + 1] -= 1;
														
 
															+				path->slots[level] = orig_slot;
														
 
															+				btrfs_tree_unlock(mid);
														
 
															+				free_extent_buffer(mid);
														
 
															+			} else {
														
 
															+				orig_slot -=
														
 
															+					btrfs_header_nritems(left);
														
 
															+				path->slots[level] = orig_slot;
														
 
															+				btrfs_tree_unlock(left);
														
 
															+				free_extent_buffer(left);
														
 
															+			}
														
 
															+			return 0;
														
 
															+		}
														
 
															+		btrfs_tree_unlock(left);
														
 
															+		free_extent_buffer(left);
														
 
															+	}
														
 
															+	right = read_node_slot(root, parent, pslot + 1);
														
 
															+
														
 
															+	/*
														
 
															+	 * then try to empty the right most buffer into the middle
														
 
															+	 */
														
 
															+	if (right) {
														
 
															+		u32 right_nr;
														
 
															+		btrfs_tree_lock(right);
														
 
															+		right_nr = btrfs_header_nritems(right);
														
 
															+		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
														
 
															+			wret = 1;
														
 
															+		} else {
														
 
															+			ret = btrfs_cow_block(trans, root, right,
														
 
															+					      parent, pslot + 1,
														
 
															+					      &right, 0);
														
 
															+			if (ret)
														
 
															+				wret = 1;
														
 
															+			else {
														
 
															+				wret = balance_node_right(trans, root,
														
 
															+							  right, mid);
														
 
															+			}
														
 
															+		}
														
 
															+		if (wret < 0)
														
 
															+			ret = wret;
														
 
															+		if (wret == 0) {
														
 
															+			struct btrfs_disk_key disk_key;
														
 
															+
														
 
															+			btrfs_node_key(right, &disk_key, 0);
														
 
															+			btrfs_set_node_key(parent, &disk_key, pslot + 1);
														
 
															+			btrfs_mark_buffer_dirty(parent);
														
 
															+
														
 
															+			if (btrfs_header_nritems(mid) <= orig_slot) {
														
 
															+				path->nodes[level] = right;
														
 
															+				path->slots[level + 1] += 1;
														
 
															+				path->slots[level] = orig_slot -
														
 
															+					btrfs_header_nritems(mid);
														
 
															+				btrfs_tree_unlock(mid);
														
 
															+				free_extent_buffer(mid);
														
 
															+			} else {
														
 
															+				btrfs_tree_unlock(right);
														
 
															+				free_extent_buffer(right);
														
 
															+			}
														
 
															+			return 0;
														
 
															+		}
														
 
															+		btrfs_tree_unlock(right);
														
 
															+		free_extent_buffer(right);
														
 
															+	}
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * readahead one full node of leaves, finding things that are close
														
 
															+ * to the block in 'slot', and triggering ra on them.
														
 
															+ */
														
 
															+static noinline void reada_for_search(struct btrfs_root *root,
														
 
															+				      struct btrfs_path *path,
														
 
															+				      int level, int slot, u64 objectid)
														
 
															+{
														
 
															+	struct extent_buffer *node;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	u32 nritems;
														
 
															+	u64 search;
														
 
															+	u64 lowest_read;
														
 
															+	u64 highest_read;
														
 
															+	u64 nread = 0;
														
 
															+	int direction = path->reada;
														
 
															+	struct extent_buffer *eb;
														
 
															+	u32 nr;
														
 
															+	u32 blocksize;
														
 
															+	u32 nscan = 0;
														
 
															+
														
 
															+	if (level != 1)
														
 
															+		return;
														
 
															+
														
 
															+	if (!path->nodes[level])
														
 
															+		return;
														
 
															+
														
 
															+	node = path->nodes[level];
														
 
															+
														
 
															+	search = btrfs_node_blockptr(node, slot);
														
 
															+	blocksize = btrfs_level_size(root, level - 1);
														
 
															+	eb = btrfs_find_tree_block(root, search, blocksize);
														
 
															+	if (eb) {
														
 
															+		free_extent_buffer(eb);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	highest_read = search;
														
 
															+	lowest_read = search;
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(node);
														
 
															+	nr = slot;
														
 
															+	while(1) {
														
 
															+		if (direction < 0) {
														
 
															+			if (nr == 0)
														
 
															+				break;
														
 
															+			nr--;
														
 
															+		} else if (direction > 0) {
														
 
															+			nr++;
														
 
															+			if (nr >= nritems)
														
 
															+				break;
														
 
															+		}
														
 
															+		if (path->reada < 0 && objectid) {
														
 
															+			btrfs_node_key(node, &disk_key, nr);
														
 
															+			if (btrfs_disk_key_objectid(&disk_key) != objectid)
														
 
															+				break;
														
 
															+		}
														
 
															+		search = btrfs_node_blockptr(node, nr);
														
 
															+		if ((search >= lowest_read && search <= highest_read) ||
														
 
															+		    (search < lowest_read && lowest_read - search <= 32768) ||
														
 
															+		    (search > highest_read && search - highest_read <= 32768)) {
														
 
															+			readahead_tree_block(root, search, blocksize,
														
 
															+				     btrfs_node_ptr_generation(node, nr));
														
 
															+			nread += blocksize;
														
 
															+		}
														
 
															+		nscan++;
														
 
															+		if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
														
 
															+			break;
														
 
															+		if(nread > (1024 * 1024) || nscan > 128)
														
 
															+			break;
														
 
															+
														
 
															+		if (search < lowest_read)
														
 
															+			lowest_read = search;
														
 
															+		if (search > highest_read)
														
 
															+			highest_read = search;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when we walk down the tree, it is usually safe to unlock the higher layers in
														
 
															+ * the tree.  The exceptions are when our path goes through slot 0, because operations
														
 
															+ * on the tree might require changing key pointers higher up in the tree.
														
 
															+ *
														
 
															+ * callers might also have set path->keep_locks, which tells this code to
														
 
															+ * keep the lock if the path points to the last slot in the block.  This is
														
 
															+ * part of walking through the tree, and selecting the next slot in the higher
														
 
															+ * block.
														
 
															+ *
														
 
															+ * lowest_unlock sets the lowest level in the tree we're allowed to unlock.
														
 
															+ * so if lowest_unlock is 1, level 0 won't be unlocked
														
 
															+ */
														
 
															+static noinline void unlock_up(struct btrfs_path *path, int level,
														
 
															+			       int lowest_unlock)
														
 
															+{
														
 
															+	int i;
														
 
															+	int skip_level = level;
														
 
															+	int no_skips = 0;
														
 
															+	struct extent_buffer *t;
														
 
															+
														
 
															+	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
														
 
															+		if (!path->nodes[i])
														
 
															+			break;
														
 
															+		if (!path->locks[i])
														
 
															+			break;
														
 
															+		if (!no_skips && path->slots[i] == 0) {
														
 
															+			skip_level = i + 1;
														
 
															+			continue;
														
 
															+		}
														
 
															+		if (!no_skips && path->keep_locks) {
														
 
															+			u32 nritems;
														
 
															+			t = path->nodes[i];
														
 
															+			nritems = btrfs_header_nritems(t);
														
 
															+			if (nritems < 1 || path->slots[i] >= nritems - 1) {
														
 
															+				skip_level = i + 1;
														
 
															+				continue;
														
 
															+			}
														
 
															+		}
														
 
															+		if (skip_level < i && i >= lowest_unlock)
														
 
															+			no_skips = 1;
														
 
															+
														
 
															+		t = path->nodes[i];
														
 
															+		if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
														
 
															+			btrfs_tree_unlock(t);
														
 
															+			path->locks[i] = 0;
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * look for key in the tree.  path is filled in with nodes along the way
														
 
															+ * if key is found, we return zero and you can find the item in the leaf
														
 
															+ * level of the path (level 0)
														
 
															+ *
														
 
															+ * If the key isn't found, the path points to the slot where it should
														
 
															+ * be inserted, and 1 is returned.  If there are other errors during the
														
 
															+ * search a negative error number is returned.
														
 
															+ *
														
 
															+ * if ins_len > 0, nodes and leaves will be split as we walk down the
														
 
															+ * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
														
 
															+ * possible)
														
 
															+ */
														
 
															+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *key, struct btrfs_path *p, int
														
 
															+		      ins_len, int cow)
														
 
															+{
														
 
															+	struct extent_buffer *b;
														
 
															+	struct extent_buffer *tmp;
														
 
															+	int slot;
														
 
															+	int ret;
														
 
															+	int level;
														
 
															+	int should_reada = p->reada;
														
 
															+	int lowest_unlock = 1;
														
 
															+	int blocksize;
														
 
															+	u8 lowest_level = 0;
														
 
															+	u64 blocknr;
														
 
															+	u64 gen;
														
 
															+	struct btrfs_key prealloc_block;
														
 
															+
														
 
															+	lowest_level = p->lowest_level;
														
 
															+	WARN_ON(lowest_level && ins_len > 0);
														
 
															+	WARN_ON(p->nodes[0] != NULL);
														
 
															+	WARN_ON(cow && root == root->fs_info->extent_root &&
														
 
															+		!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	if (ins_len < 0)
														
 
															+		lowest_unlock = 2;
														
 
															+
														
 
															+	prealloc_block.objectid = 0;
														
 
															+
														
 
															+again:
														
 
															+	if (p->skip_locking)
														
 
															+		b = btrfs_root_node(root);
														
 
															+	else
														
 
															+		b = btrfs_lock_root_node(root);
														
 
															+
														
 
															+	while (b) {
														
 
															+		level = btrfs_header_level(b);
														
 
															+
														
 
															+		/*
														
 
															+		 * setup the path here so we can release it under lock
														
 
															+		 * contention with the cow code
														
 
															+		 */
														
 
															+		p->nodes[level] = b;
														
 
															+		if (!p->skip_locking)
														
 
															+			p->locks[level] = 1;
														
 
															+
														
 
															+		if (cow) {
														
 
															+			int wret;
														
 
															+
														
 
															+			/* is a cow on this block not required */
														
 
															+			spin_lock(&root->fs_info->hash_lock);
														
 
															+			if (btrfs_header_generation(b) == trans->transid &&
														
 
															+			    btrfs_header_owner(b) == root->root_key.objectid &&
														
 
															+			    !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
														
 
															+				spin_unlock(&root->fs_info->hash_lock);
														
 
															+				goto cow_done;
														
 
															+			}
														
 
															+			spin_unlock(&root->fs_info->hash_lock);
														
 
															+
														
 
															+			/* ok, we have to cow, is our old prealloc the right
														
 
															+			 * size?
														
 
															+			 */
														
 
															+			if (prealloc_block.objectid &&
														
 
															+			    prealloc_block.offset != b->len) {
														
 
															+				btrfs_free_reserved_extent(root,
														
 
															+					   prealloc_block.objectid,
														
 
															+					   prealloc_block.offset);
														
 
															+				prealloc_block.objectid = 0;
														
 
															+			}
														
 
															+
														
 
															+			/*
														
 
															+			 * for higher level blocks, try not to allocate blocks
														
 
															+			 * with the block and the parent locks held.
														
 
															+			 */
														
 
															+			if (level > 1 && !prealloc_block.objectid &&
														
 
															+			    btrfs_path_lock_waiting(p, level)) {
														
 
															+				u32 size = b->len;
														
 
															+				u64 hint = b->start;
														
 
															+
														
 
															+				btrfs_release_path(root, p);
														
 
															+				ret = btrfs_reserve_extent(trans, root,
														
 
															+							   size, size, 0,
														
 
															+							   hint, (u64)-1,
														
 
															+							   &prealloc_block, 0);
														
 
															+				BUG_ON(ret);
														
 
															+				goto again;
														
 
															+			}
														
 
															+
														
 
															+			wret = btrfs_cow_block(trans, root, b,
														
 
															+					       p->nodes[level + 1],
														
 
															+					       p->slots[level + 1],
														
 
															+					       &b, prealloc_block.objectid);
														
 
															+			prealloc_block.objectid = 0;
														
 
															+			if (wret) {
														
 
															+				free_extent_buffer(b);
														
 
															+				ret = wret;
														
 
															+				goto done;
														
 
															+			}
														
 
															+		}
														
 
															+cow_done:
														
 
															+		BUG_ON(!cow && ins_len);
														
 
															+		if (level != btrfs_header_level(b))
														
 
															+			WARN_ON(1);
														
 
															+		level = btrfs_header_level(b);
														
 
															+
														
 
															+		p->nodes[level] = b;
														
 
															+		if (!p->skip_locking)
														
 
															+			p->locks[level] = 1;
														
 
															+
														
 
															+		ret = check_block(root, p, level);
														
 
															+		if (ret) {
														
 
															+			ret = -1;
														
 
															+			goto done;
														
 
															+		}
														
 
															+
														
 
															+		ret = bin_search(b, key, level, &slot);
														
 
															+		if (level != 0) {
														
 
															+			if (ret && slot > 0)
														
 
															+				slot -= 1;
														
 
															+			p->slots[level] = slot;
														
 
															+			if (ins_len > 0 && btrfs_header_nritems(b) >=
														
 
															+			    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
														
 
															+				int sret = split_node(trans, root, p, level);
														
 
															+				BUG_ON(sret > 0);
														
 
															+				if (sret) {
														
 
															+					ret = sret;
														
 
															+					goto done;
														
 
															+				}
														
 
															+				b = p->nodes[level];
														
 
															+				slot = p->slots[level];
														
 
															+			} else if (ins_len < 0) {
														
 
															+				int sret = balance_level(trans, root, p,
														
 
															+							 level);
														
 
															+				if (sret) {
														
 
															+					ret = sret;
														
 
															+					goto done;
														
 
															+				}
														
 
															+				b = p->nodes[level];
														
 
															+				if (!b) {
														
 
															+					btrfs_release_path(NULL, p);
														
 
															+					goto again;
														
 
															+				}
														
 
															+				slot = p->slots[level];
														
 
															+				BUG_ON(btrfs_header_nritems(b) == 1);
														
 
															+			}
														
 
															+			unlock_up(p, level, lowest_unlock);
														
 
															+
														
 
															+			/* this is only true while dropping a snapshot */
														
 
															+			if (level == lowest_level) {
														
 
															+				ret = 0;
														
 
															+				goto done;
														
 
															+			}
														
 
															+
														
 
															+			blocknr = btrfs_node_blockptr(b, slot);
														
 
															+			gen = btrfs_node_ptr_generation(b, slot);
														
 
															+			blocksize = btrfs_level_size(root, level - 1);
														
 
															+
														
 
															+			tmp = btrfs_find_tree_block(root, blocknr, blocksize);
														
 
															+			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
														
 
															+				b = tmp;
														
 
															+			} else {
														
 
															+				/*
														
 
															+				 * reduce lock contention at high levels
														
 
															+				 * of the btree by dropping locks before
														
 
															+				 * we read.
														
 
															+				 */
														
 
															+				if (level > 1) {
														
 
															+					btrfs_release_path(NULL, p);
														
 
															+					if (tmp)
														
 
															+						free_extent_buffer(tmp);
														
 
															+					if (should_reada)
														
 
															+						reada_for_search(root, p,
														
 
															+								 level, slot,
														
 
															+								 key->objectid);
														
 
															+
														
 
															+					tmp = read_tree_block(root, blocknr,
														
 
															+							 blocksize, gen);
														
 
															+					if (tmp)
														
 
															+						free_extent_buffer(tmp);
														
 
															+					goto again;
														
 
															+				} else {
														
 
															+					if (tmp)
														
 
															+						free_extent_buffer(tmp);
														
 
															+					if (should_reada)
														
 
															+						reada_for_search(root, p,
														
 
															+								 level, slot,
														
 
															+								 key->objectid);
														
 
															+					b = read_node_slot(root, b, slot);
														
 
															+				}
														
 
															+			}
														
 
															+			if (!p->skip_locking)
														
 
															+				btrfs_tree_lock(b);
														
 
															+		} else {
														
 
															+			p->slots[level] = slot;
														
 
															+			if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
														
 
															+			    sizeof(struct btrfs_item) + ins_len) {
														
 
															+				int sret = split_leaf(trans, root, key,
														
 
															+						      p, ins_len, ret == 0);
														
 
															+				BUG_ON(sret > 0);
														
 
															+				if (sret) {
														
 
															+					ret = sret;
														
 
															+					goto done;
														
 
															+				}
														
 
															+			}
														
 
															+			unlock_up(p, level, lowest_unlock);
														
 
															+			goto done;
														
 
															+		}
														
 
															+	}
														
 
															+	ret = 1;
														
 
															+done:
														
 
															+	if (prealloc_block.objectid) {
														
 
															+		btrfs_free_reserved_extent(root,
														
 
															+			   prealloc_block.objectid,
														
 
															+			   prealloc_block.offset);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_merge_path(struct btrfs_trans_handle *trans,
														
 
															+		     struct btrfs_root *root,
														
 
															+		     struct btrfs_key *node_keys,
														
 
															+		     u64 *nodes, int lowest_level)
														
 
															+{
														
 
															+	struct extent_buffer *eb;
														
 
															+	struct extent_buffer *parent;
														
 
															+	struct btrfs_key key;
														
 
															+	u64 bytenr;
														
 
															+	u64 generation;
														
 
															+	u32 blocksize;
														
 
															+	int level;
														
 
															+	int slot;
														
 
															+	int key_match;
														
 
															+	int ret;
														
 
															+
														
 
															+	eb = btrfs_lock_root_node(root);
														
 
															+	ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	parent = eb;
														
 
															+	while (1) {
														
 
															+		level = btrfs_header_level(parent);
														
 
															+		if (level == 0 || level <= lowest_level)
														
 
															+			break;
														
 
															+
														
 
															+		ret = bin_search(parent, &node_keys[lowest_level], level,
														
 
															+				 &slot);
														
 
															+		if (ret && slot > 0)
														
 
															+			slot--;
														
 
															+
														
 
															+		bytenr = btrfs_node_blockptr(parent, slot);
														
 
															+		if (nodes[level - 1] == bytenr)
														
 
															+			break;
														
 
															+
														
 
															+		blocksize = btrfs_level_size(root, level - 1);
														
 
															+		generation = btrfs_node_ptr_generation(parent, slot);
														
 
															+		btrfs_node_key_to_cpu(eb, &key, slot);
														
 
															+		key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
														
 
															+
														
 
															+		/*
														
 
															+		 * if node keys match and node pointer hasn't been modified
														
 
															+		 * in the running transaction, we can merge the path. for
														
 
															+		 * reloc trees, the node pointer check is skipped, this is
														
 
															+		 * because the reloc trees are fully controlled by the space
														
 
															+		 * balance code, no one else can modify them.
														
 
															+		 */
														
 
															+		if (!nodes[level - 1] || !key_match ||
														
 
															+		    (generation == trans->transid &&
														
 
															+		     root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) {
														
 
															+next_level:
														
 
															+			if (level == 1 || level == lowest_level + 1)
														
 
															+				break;
														
 
															+
														
 
															+			eb = read_tree_block(root, bytenr, blocksize,
														
 
															+					     generation);
														
 
															+			btrfs_tree_lock(eb);
														
 
															+
														
 
															+			ret = btrfs_cow_block(trans, root, eb, parent, slot,
														
 
															+					      &eb, 0);
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			btrfs_tree_unlock(parent);
														
 
															+			free_extent_buffer(parent);
														
 
															+			parent = eb;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		if (generation == trans->transid) {
														
 
															+			u32 refs;
														
 
															+			BUG_ON(btrfs_header_owner(eb) !=
														
 
															+			       BTRFS_TREE_RELOC_OBJECTID);
														
 
															+			/*
														
 
															+			 * lock the block to keep __btrfs_cow_block from
														
 
															+			 * changing the reference count.
														
 
															+			 */
														
 
															+			eb = read_tree_block(root, bytenr, blocksize,
														
 
															+					     generation);
														
 
															+			btrfs_tree_lock(eb);
														
 
															+
														
 
															+			ret = btrfs_lookup_extent_ref(trans, root, bytenr,
														
 
															+						      blocksize, &refs);
														
 
															+			BUG_ON(ret);
														
 
															+			/*
														
 
															+			 * if replace block whose reference count is one,
														
 
															+			 * we have to "drop the subtree". so skip it for
														
 
															+			 * simplicity
														
 
															+			 */
														
 
															+			if (refs == 1) {
														
 
															+				btrfs_tree_unlock(eb);
														
 
															+				free_extent_buffer(eb);
														
 
															+				goto next_level;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
														
 
															+		btrfs_set_node_ptr_generation(parent, slot, trans->transid);
														
 
															+		btrfs_mark_buffer_dirty(parent);
														
 
															+
														
 
															+		ret = btrfs_inc_extent_ref(trans, root,
														
 
															+					nodes[level - 1],
														
 
															+					blocksize, parent->start,
														
 
															+					btrfs_header_owner(parent),
														
 
															+					btrfs_header_generation(parent),
														
 
															+					level - 1);
														
 
															+		BUG_ON(ret);
														
 
															+		ret = btrfs_free_extent(trans, root, bytenr,
														
 
															+					blocksize, parent->start,
														
 
															+					btrfs_header_owner(parent),
														
 
															+					btrfs_header_generation(parent),
														
 
															+					level - 1, 1);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		if (generation == trans->transid) {
														
 
															+			btrfs_tree_unlock(eb);
														
 
															+			free_extent_buffer(eb);
														
 
															+		}
														
 
															+		break;
														
 
															+	}
														
 
															+	btrfs_tree_unlock(parent);
														
 
															+	free_extent_buffer(parent);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * adjust the pointers going up the tree, starting at level
														
 
															+ * making sure the right key of each node is points to 'key'.
														
 
															+ * This is used after shifting pointers to the left, so it stops
														
 
															+ * fixing up pointers when a given leaf/node is not in slot 0 of the
														
 
															+ * higher levels
														
 
															+ *
														
 
															+ * If this fails to write a tree block, it returns -1, but continues
														
 
															+ * fixing up the blocks in ram so the tree is consistent.
														
 
															+ */
														
 
															+static int fixup_low_keys(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			  struct btrfs_disk_key *key, int level)
														
 
															+{
														
 
															+	int i;
														
 
															+	int ret = 0;
														
 
															+	struct extent_buffer *t;
														
 
															+
														
 
															+	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
														
 
															+		int tslot = path->slots[i];
														
 
															+		if (!path->nodes[i])
														
 
															+			break;
														
 
															+		t = path->nodes[i];
														
 
															+		btrfs_set_node_key(t, key, tslot);
														
 
															+		btrfs_mark_buffer_dirty(path->nodes[i]);
														
 
															+		if (tslot != 0)
														
 
															+			break;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * update item key.
														
 
															+ *
														
 
															+ * This function isn't completely safe. It's the caller's responsibility
														
 
															+ * that the new key won't break the order
														
 
															+ */
														
 
															+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			    struct btrfs_key *new_key)
														
 
															+{
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	struct extent_buffer *eb;
														
 
															+	int slot;
														
 
															+
														
 
															+	eb = path->nodes[0];
														
 
															+	slot = path->slots[0];
														
 
															+	if (slot > 0) {
														
 
															+		btrfs_item_key(eb, &disk_key, slot - 1);
														
 
															+		if (comp_keys(&disk_key, new_key) >= 0)
														
 
															+			return -1;
														
 
															+	}
														
 
															+	if (slot < btrfs_header_nritems(eb) - 1) {
														
 
															+		btrfs_item_key(eb, &disk_key, slot + 1);
														
 
															+		if (comp_keys(&disk_key, new_key) <= 0)
														
 
															+			return -1;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_cpu_key_to_disk(&disk_key, new_key);
														
 
															+	btrfs_set_item_key(eb, &disk_key, slot);
														
 
															+	btrfs_mark_buffer_dirty(eb);
														
 
															+	if (slot == 0)
														
 
															+		fixup_low_keys(trans, root, path, &disk_key, 1);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * try to push data from one node into the next node left in the
														
 
															+ * tree.
														
 
															+ *
														
 
															+ * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
														
 
															+ * error, and > 0 if there was no room in the left hand block.
														
 
															+ */
														
 
															+static int push_node_left(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, struct extent_buffer *dst,
														
 
															+			  struct extent_buffer *src, int empty)
														
 
															+{
														
 
															+	int push_items = 0;
														
 
															+	int src_nritems;
														
 
															+	int dst_nritems;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	src_nritems = btrfs_header_nritems(src);
														
 
															+	dst_nritems = btrfs_header_nritems(dst);
														
 
															+	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
														
 
															+	WARN_ON(btrfs_header_generation(src) != trans->transid);
														
 
															+	WARN_ON(btrfs_header_generation(dst) != trans->transid);
														
 
															+
														
 
															+	if (!empty && src_nritems <= 8)
														
 
															+		return 1;
														
 
															+
														
 
															+	if (push_items <= 0) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	if (empty) {
														
 
															+		push_items = min(src_nritems, push_items);
														
 
															+		if (push_items < src_nritems) {
														
 
															+			/* leave at least 8 pointers in the node if
														
 
															+			 * we aren't going to empty it
														
 
															+			 */
														
 
															+			if (src_nritems - push_items < 8) {
														
 
															+				if (push_items <= 8)
														
 
															+					return 1;
														
 
															+				push_items -= 8;
														
 
															+			}
														
 
															+		}
														
 
															+	} else
														
 
															+		push_items = min(src_nritems - 8, push_items);
														
 
															+
														
 
															+	copy_extent_buffer(dst, src,
														
 
															+			   btrfs_node_key_ptr_offset(dst_nritems),
														
 
															+			   btrfs_node_key_ptr_offset(0),
														
 
															+		           push_items * sizeof(struct btrfs_key_ptr));
														
 
															+
														
 
															+	if (push_items < src_nritems) {
														
 
															+		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
														
 
															+				      btrfs_node_key_ptr_offset(push_items),
														
 
															+				      (src_nritems - push_items) *
														
 
															+				      sizeof(struct btrfs_key_ptr));
														
 
															+	}
														
 
															+	btrfs_set_header_nritems(src, src_nritems - push_items);
														
 
															+	btrfs_set_header_nritems(dst, dst_nritems + push_items);
														
 
															+	btrfs_mark_buffer_dirty(src);
														
 
															+	btrfs_mark_buffer_dirty(dst);
														
 
															+
														
 
															+	ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * try to push data from one node into the next node right in the
														
 
															+ * tree.
														
 
															+ *
														
 
															+ * returns 0 if some ptrs were pushed, < 0 if there was some horrible
														
 
															+ * error, and > 0 if there was no room in the right hand block.
														
 
															+ *
														
 
															+ * this will  only push up to 1/2 the contents of the left node over
														
 
															+ */
														
 
															+static int balance_node_right(struct btrfs_trans_handle *trans,
														
 
															+			      struct btrfs_root *root,
														
 
															+			      struct extent_buffer *dst,
														
 
															+			      struct extent_buffer *src)
														
 
															+{
														
 
															+	int push_items = 0;
														
 
															+	int max_push;
														
 
															+	int src_nritems;
														
 
															+	int dst_nritems;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	WARN_ON(btrfs_header_generation(src) != trans->transid);
														
 
															+	WARN_ON(btrfs_header_generation(dst) != trans->transid);
														
 
															+
														
 
															+	src_nritems = btrfs_header_nritems(src);
														
 
															+	dst_nritems = btrfs_header_nritems(dst);
														
 
															+	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
														
 
															+	if (push_items <= 0) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	if (src_nritems < 4) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	max_push = src_nritems / 2 + 1;
														
 
															+	/* don't try to empty the node */
														
 
															+	if (max_push >= src_nritems) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	if (max_push < push_items)
														
 
															+		push_items = max_push;
														
 
															+
														
 
															+	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
														
 
															+				      btrfs_node_key_ptr_offset(0),
														
 
															+				      (dst_nritems) *
														
 
															+				      sizeof(struct btrfs_key_ptr));
														
 
															+
														
 
															+	copy_extent_buffer(dst, src,
														
 
															+			   btrfs_node_key_ptr_offset(0),
														
 
															+			   btrfs_node_key_ptr_offset(src_nritems - push_items),
														
 
															+		           push_items * sizeof(struct btrfs_key_ptr));
														
 
															+
														
 
															+	btrfs_set_header_nritems(src, src_nritems - push_items);
														
 
															+	btrfs_set_header_nritems(dst, dst_nritems + push_items);
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(src);
														
 
															+	btrfs_mark_buffer_dirty(dst);
														
 
															+
														
 
															+	ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to insert a new root level in the tree.
														
 
															+ * A new node is allocated, and a single item is inserted to
														
 
															+ * point to the existing root
														
 
															+ *
														
 
															+ * returns zero on success or < 0 on failure.
														
 
															+ */
														
 
															+static int noinline insert_new_root(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   struct btrfs_path *path, int level)
														
 
															+{
														
 
															+	u64 lower_gen;
														
 
															+	struct extent_buffer *lower;
														
 
															+	struct extent_buffer *c;
														
 
															+	struct extent_buffer *old;
														
 
															+	struct btrfs_disk_key lower_key;
														
 
															+	int ret;
														
 
															+
														
 
															+	BUG_ON(path->nodes[level]);
														
 
															+	BUG_ON(path->nodes[level-1] != root->node);
														
 
															+
														
 
															+	lower = path->nodes[level-1];
														
 
															+	if (level == 1)
														
 
															+		btrfs_item_key(lower, &lower_key, 0);
														
 
															+	else
														
 
															+		btrfs_node_key(lower, &lower_key, 0);
														
 
															+
														
 
															+	c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
														
 
															+				   root->root_key.objectid, trans->transid,
														
 
															+				   level, root->node->start, 0);
														
 
															+	if (IS_ERR(c))
														
 
															+		return PTR_ERR(c);
														
 
															+
														
 
															+	memset_extent_buffer(c, 0, 0, root->nodesize);
														
 
															+	btrfs_set_header_nritems(c, 1);
														
 
															+	btrfs_set_header_level(c, level);
														
 
															+	btrfs_set_header_bytenr(c, c->start);
														
 
															+	btrfs_set_header_generation(c, trans->transid);
														
 
															+	btrfs_set_header_owner(c, root->root_key.objectid);
														
 
															+
														
 
															+	write_extent_buffer(c, root->fs_info->fsid,
														
 
															+			    (unsigned long)btrfs_header_fsid(c),
														
 
															+			    BTRFS_FSID_SIZE);
														
 
															+
														
 
															+	write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
														
 
															+			    (unsigned long)btrfs_header_chunk_tree_uuid(c),
														
 
															+			    BTRFS_UUID_SIZE);
														
 
															+
														
 
															+	btrfs_set_node_key(c, &lower_key, 0);
														
 
															+	btrfs_set_node_blockptr(c, 0, lower->start);
														
 
															+	lower_gen = btrfs_header_generation(lower);
														
 
															+	WARN_ON(lower_gen != trans->transid);
														
 
															+
														
 
															+	btrfs_set_node_ptr_generation(c, 0, lower_gen);
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(c);
														
 
															+
														
 
															+	spin_lock(&root->node_lock);
														
 
															+	old = root->node;
														
 
															+	root->node = c;
														
 
															+	spin_unlock(&root->node_lock);
														
 
															+
														
 
															+	ret = btrfs_update_extent_ref(trans, root, lower->start,
														
 
															+				      lower->start, c->start,
														
 
															+				      root->root_key.objectid,
														
 
															+				      trans->transid, level - 1);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	/* the super has an extra ref to root->node */
														
 
															+	free_extent_buffer(old);
														
 
															+
														
 
															+	add_root_to_dirty_list(root);
														
 
															+	extent_buffer_get(c);
														
 
															+	path->nodes[level] = c;
														
 
															+	path->locks[level] = 1;
														
 
															+	path->slots[level] = 0;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * worker function to insert a single pointer in a node.
														
 
															+ * the node should have enough room for the pointer already
														
 
															+ *
														
 
															+ * slot and level indicate where you want the key to go, and
														
 
															+ * blocknr is the block the key points to.
														
 
															+ *
														
 
															+ * returns zero on success and < 0 on any error
														
 
															+ */
														
 
															+static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_path *path, struct btrfs_disk_key
														
 
															+		      *key, u64 bytenr, int slot, int level)
														
 
															+{
														
 
															+	struct extent_buffer *lower;
														
 
															+	int nritems;
														
 
															+
														
 
															+	BUG_ON(!path->nodes[level]);
														
 
															+	lower = path->nodes[level];
														
 
															+	nritems = btrfs_header_nritems(lower);
														
 
															+	if (slot > nritems)
														
 
															+		BUG();
														
 
															+	if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
														
 
															+		BUG();
														
 
															+	if (slot != nritems) {
														
 
															+		memmove_extent_buffer(lower,
														
 
															+			      btrfs_node_key_ptr_offset(slot + 1),
														
 
															+			      btrfs_node_key_ptr_offset(slot),
														
 
															+			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
														
 
															+	}
														
 
															+	btrfs_set_node_key(lower, key, slot);
														
 
															+	btrfs_set_node_blockptr(lower, slot, bytenr);
														
 
															+	WARN_ON(trans->transid == 0);
														
 
															+	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
														
 
															+	btrfs_set_header_nritems(lower, nritems + 1);
														
 
															+	btrfs_mark_buffer_dirty(lower);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * split the node at the specified level in path in two.
														
 
															+ * The path is corrected to point to the appropriate node after the split
														
 
															+ *
														
 
															+ * Before splitting this tries to make some room in the node by pushing
														
 
															+ * left and right, if either one works, it returns right away.
														
 
															+ *
														
 
															+ * returns 0 on success and < 0 on failure
														
 
															+ */
														
 
															+static noinline int split_node(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       struct btrfs_path *path, int level)
														
 
															+{
														
 
															+	struct extent_buffer *c;
														
 
															+	struct extent_buffer *split;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	int mid;
														
 
															+	int ret;
														
 
															+	int wret;
														
 
															+	u32 c_nritems;
														
 
															+
														
 
															+	c = path->nodes[level];
														
 
															+	WARN_ON(btrfs_header_generation(c) != trans->transid);
														
 
															+	if (c == root->node) {
														
 
															+		/* trying to split the root, lets make a new one */
														
 
															+		ret = insert_new_root(trans, root, path, level + 1);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+	} else {
														
 
															+		ret = push_nodes_for_insert(trans, root, path, level);
														
 
															+		c = path->nodes[level];
														
 
															+		if (!ret && btrfs_header_nritems(c) <
														
 
															+		    BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
														
 
															+			return 0;
														
 
															+		if (ret < 0)
														
 
															+			return ret;
														
 
															+	}
														
 
															+
														
 
															+	c_nritems = btrfs_header_nritems(c);
														
 
															+
														
 
															+	split = btrfs_alloc_free_block(trans, root, root->nodesize,
														
 
															+					path->nodes[level + 1]->start,
														
 
															+					root->root_key.objectid,
														
 
															+					trans->transid, level, c->start, 0);
														
 
															+	if (IS_ERR(split))
														
 
															+		return PTR_ERR(split);
														
 
															+
														
 
															+	btrfs_set_header_flags(split, btrfs_header_flags(c));
														
 
															+	btrfs_set_header_level(split, btrfs_header_level(c));
														
 
															+	btrfs_set_header_bytenr(split, split->start);
														
 
															+	btrfs_set_header_generation(split, trans->transid);
														
 
															+	btrfs_set_header_owner(split, root->root_key.objectid);
														
 
															+	btrfs_set_header_flags(split, 0);
														
 
															+	write_extent_buffer(split, root->fs_info->fsid,
														
 
															+			    (unsigned long)btrfs_header_fsid(split),
														
 
															+			    BTRFS_FSID_SIZE);
														
 
															+	write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
														
 
															+			    (unsigned long)btrfs_header_chunk_tree_uuid(split),
														
 
															+			    BTRFS_UUID_SIZE);
														
 
															+
														
 
															+	mid = (c_nritems + 1) / 2;
														
 
															+
														
 
															+	copy_extent_buffer(split, c,
														
 
															+			   btrfs_node_key_ptr_offset(0),
														
 
															+			   btrfs_node_key_ptr_offset(mid),
														
 
															+			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
														
 
															+	btrfs_set_header_nritems(split, c_nritems - mid);
														
 
															+	btrfs_set_header_nritems(c, mid);
														
 
															+	ret = 0;
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(c);
														
 
															+	btrfs_mark_buffer_dirty(split);
														
 
															+
														
 
															+	btrfs_node_key(split, &disk_key, 0);
														
 
															+	wret = insert_ptr(trans, root, path, &disk_key, split->start,
														
 
															+			  path->slots[level + 1] + 1,
														
 
															+			  level + 1);
														
 
															+	if (wret)
														
 
															+		ret = wret;
														
 
															+
														
 
															+	ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (path->slots[level] >= mid) {
														
 
															+		path->slots[level] -= mid;
														
 
															+		btrfs_tree_unlock(c);
														
 
															+		free_extent_buffer(c);
														
 
															+		path->nodes[level] = split;
														
 
															+		path->slots[level + 1] += 1;
														
 
															+	} else {
														
 
															+		btrfs_tree_unlock(split);
														
 
															+		free_extent_buffer(split);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * how many bytes are required to store the items in a leaf.  start
														
 
															+ * and nr indicate which items in the leaf to check.  This totals up the
														
 
															+ * space used both by the item structs and the item data
														
 
															+ */
														
 
															+static int leaf_space_used(struct extent_buffer *l, int start, int nr)
														
 
															+{
														
 
															+	int data_len;
														
 
															+	int nritems = btrfs_header_nritems(l);
														
 
															+	int end = min(nritems, start + nr) - 1;
														
 
															+
														
 
															+	if (!nr)
														
 
															+		return 0;
														
 
															+	data_len = btrfs_item_end_nr(l, start);
														
 
															+	data_len = data_len - btrfs_item_offset_nr(l, end);
														
 
															+	data_len += sizeof(struct btrfs_item) * nr;
														
 
															+	WARN_ON(data_len < 0);
														
 
															+	return data_len;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * The space between the end of the leaf items and
														
 
															+ * the start of the leaf data.  IOW, how much room
														
 
															+ * the leaf has left for both items and data
														
 
															+ */
														
 
															+int noinline btrfs_leaf_free_space(struct btrfs_root *root,
														
 
															+				   struct extent_buffer *leaf)
														
 
															+{
														
 
															+	int nritems = btrfs_header_nritems(leaf);
														
 
															+	int ret;
														
 
															+	ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
														
 
															+	if (ret < 0) {
														
 
															+		printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
														
 
															+		       ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
														
 
															+		       leaf_space_used(leaf, 0, nritems), nritems);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * push some data in the path leaf to the right, trying to free up at
														
 
															+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
														
 
															+ *
														
 
															+ * returns 1 if the push failed because the other node didn't have enough
														
 
															+ * room, 0 if everything worked out and < 0 if there were major errors.
														
 
															+ */
														
 
															+static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+			   *root, struct btrfs_path *path, int data_size,
														
 
															+			   int empty)
														
 
															+{
														
 
															+	struct extent_buffer *left = path->nodes[0];
														
 
															+	struct extent_buffer *right;
														
 
															+	struct extent_buffer *upper;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	int slot;
														
 
															+	u32 i;
														
 
															+	int free_space;
														
 
															+	int push_space = 0;
														
 
															+	int push_items = 0;
														
 
															+	struct btrfs_item *item;
														
 
															+	u32 left_nritems;
														
 
															+	u32 nr;
														
 
															+	u32 right_nritems;
														
 
															+	u32 data_end;
														
 
															+	u32 this_item_size;
														
 
															+	int ret;
														
 
															+
														
 
															+	slot = path->slots[1];
														
 
															+	if (!path->nodes[1]) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+	upper = path->nodes[1];
														
 
															+	if (slot >= btrfs_header_nritems(upper) - 1)
														
 
															+		return 1;
														
 
															+
														
 
															+	WARN_ON(!btrfs_tree_locked(path->nodes[1]));
														
 
															+
														
 
															+	right = read_node_slot(root, upper, slot + 1);
														
 
															+	btrfs_tree_lock(right);
														
 
															+	free_space = btrfs_leaf_free_space(root, right);
														
 
															+	if (free_space < data_size + sizeof(struct btrfs_item))
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	/* cow and double check */
														
 
															+	ret = btrfs_cow_block(trans, root, right, upper,
														
 
															+			      slot + 1, &right, 0);
														
 
															+	if (ret)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	free_space = btrfs_leaf_free_space(root, right);
														
 
															+	if (free_space < data_size + sizeof(struct btrfs_item))
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	left_nritems = btrfs_header_nritems(left);
														
 
															+	if (left_nritems == 0)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	if (empty)
														
 
															+		nr = 0;
														
 
															+	else
														
 
															+		nr = 1;
														
 
															+
														
 
															+	if (path->slots[0] >= left_nritems)
														
 
															+		push_space += data_size + sizeof(*item);
														
 
															+
														
 
															+	i = left_nritems - 1;
														
 
															+	while (i >= nr) {
														
 
															+		item = btrfs_item_nr(left, i);
														
 
															+
														
 
															+		if (!empty && push_items > 0) {
														
 
															+			if (path->slots[0] > i)
														
 
															+				break;
														
 
															+			if (path->slots[0] == i) {
														
 
															+				int space = btrfs_leaf_free_space(root, left);
														
 
															+				if (space + push_space * 2 > free_space)
														
 
															+					break;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if (path->slots[0] == i)
														
 
															+			push_space += data_size + sizeof(*item);
														
 
															+
														
 
															+		if (!left->map_token) {
														
 
															+			map_extent_buffer(left, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&left->map_token, &left->kaddr,
														
 
															+					&left->map_start, &left->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+
														
 
															+		this_item_size = btrfs_item_size(left, item);
														
 
															+		if (this_item_size + sizeof(*item) + push_space > free_space)
														
 
															+			break;
														
 
															+
														
 
															+		push_items++;
														
 
															+		push_space += this_item_size + sizeof(*item);
														
 
															+		if (i == 0)
														
 
															+			break;
														
 
															+		i--;
														
 
															+	}
														
 
															+	if (left->map_token) {
														
 
															+		unmap_extent_buffer(left, left->map_token, KM_USER1);
														
 
															+		left->map_token = NULL;
														
 
															+	}
														
 
															+
														
 
															+	if (push_items == 0)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	if (!empty && push_items == left_nritems)
														
 
															+		WARN_ON(1);
														
 
															+
														
 
															+	/* push left to right */
														
 
															+	right_nritems = btrfs_header_nritems(right);
														
 
															+
														
 
															+	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
														
 
															+	push_space -= leaf_data_end(root, left);
														
 
															+
														
 
															+	/* make room in the right data area */
														
 
															+	data_end = leaf_data_end(root, right);
														
 
															+	memmove_extent_buffer(right,
														
 
															+			      btrfs_leaf_data(right) + data_end - push_space,
														
 
															+			      btrfs_leaf_data(right) + data_end,
														
 
															+			      BTRFS_LEAF_DATA_SIZE(root) - data_end);
														
 
															+
														
 
															+	/* copy from the left data area */
														
 
															+	copy_extent_buffer(right, left, btrfs_leaf_data(right) +
														
 
															+		     BTRFS_LEAF_DATA_SIZE(root) - push_space,
														
 
															+		     btrfs_leaf_data(left) + leaf_data_end(root, left),
														
 
															+		     push_space);
														
 
															+
														
 
															+	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
														
 
															+			      btrfs_item_nr_offset(0),
														
 
															+			      right_nritems * sizeof(struct btrfs_item));
														
 
															+
														
 
															+	/* copy the items from left to right */
														
 
															+	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
														
 
															+		   btrfs_item_nr_offset(left_nritems - push_items),
														
 
															+		   push_items * sizeof(struct btrfs_item));
														
 
															+
														
 
															+	/* update the item pointers */
														
 
															+	right_nritems += push_items;
														
 
															+	btrfs_set_header_nritems(right, right_nritems);
														
 
															+	push_space = BTRFS_LEAF_DATA_SIZE(root);
														
 
															+	for (i = 0; i < right_nritems; i++) {
														
 
															+		item = btrfs_item_nr(right, i);
														
 
															+		if (!right->map_token) {
														
 
															+			map_extent_buffer(right, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&right->map_token, &right->kaddr,
														
 
															+					&right->map_start, &right->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+		push_space -= btrfs_item_size(right, item);
														
 
															+		btrfs_set_item_offset(right, item, push_space);
														
 
															+	}
														
 
															+
														
 
															+	if (right->map_token) {
														
 
															+		unmap_extent_buffer(right, right->map_token, KM_USER1);
														
 
															+		right->map_token = NULL;
														
 
															+	}
														
 
															+	left_nritems -= push_items;
														
 
															+	btrfs_set_header_nritems(left, left_nritems);
														
 
															+
														
 
															+	if (left_nritems)
														
 
															+		btrfs_mark_buffer_dirty(left);
														
 
															+	btrfs_mark_buffer_dirty(right);
														
 
															+
														
 
															+	ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_item_key(right, &disk_key, 0);
														
 
															+	btrfs_set_node_key(upper, &disk_key, slot + 1);
														
 
															+	btrfs_mark_buffer_dirty(upper);
														
 
															+
														
 
															+	/* then fixup the leaf pointer in the path */
														
 
															+	if (path->slots[0] >= left_nritems) {
														
 
															+		path->slots[0] -= left_nritems;
														
 
															+		if (btrfs_header_nritems(path->nodes[0]) == 0)
														
 
															+			clean_tree_block(trans, root, path->nodes[0]);
														
 
															+		btrfs_tree_unlock(path->nodes[0]);
														
 
															+		free_extent_buffer(path->nodes[0]);
														
 
															+		path->nodes[0] = right;
														
 
															+		path->slots[1] += 1;
														
 
															+	} else {
														
 
															+		btrfs_tree_unlock(right);
														
 
															+		free_extent_buffer(right);
														
 
															+	}
														
 
															+	return 0;
														
 
															+
														
 
															+out_unlock:
														
 
															+	btrfs_tree_unlock(right);
														
 
															+	free_extent_buffer(right);
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * push some data in the path leaf to the left, trying to free up at
														
 
															+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
														
 
															+ */
														
 
															+static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+			  *root, struct btrfs_path *path, int data_size,
														
 
															+			  int empty)
														
 
															+{
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	struct extent_buffer *right = path->nodes[0];
														
 
															+	struct extent_buffer *left;
														
 
															+	int slot;
														
 
															+	int i;
														
 
															+	int free_space;
														
 
															+	int push_space = 0;
														
 
															+	int push_items = 0;
														
 
															+	struct btrfs_item *item;
														
 
															+	u32 old_left_nritems;
														
 
															+	u32 right_nritems;
														
 
															+	u32 nr;
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	u32 this_item_size;
														
 
															+	u32 old_left_item_size;
														
 
															+
														
 
															+	slot = path->slots[1];
														
 
															+	if (slot == 0)
														
 
															+		return 1;
														
 
															+	if (!path->nodes[1])
														
 
															+		return 1;
														
 
															+
														
 
															+	right_nritems = btrfs_header_nritems(right);
														
 
															+	if (right_nritems == 0) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	WARN_ON(!btrfs_tree_locked(path->nodes[1]));
														
 
															+
														
 
															+	left = read_node_slot(root, path->nodes[1], slot - 1);
														
 
															+	btrfs_tree_lock(left);
														
 
															+	free_space = btrfs_leaf_free_space(root, left);
														
 
															+	if (free_space < data_size + sizeof(struct btrfs_item)) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/* cow and double check */
														
 
															+	ret = btrfs_cow_block(trans, root, left,
														
 
															+			      path->nodes[1], slot - 1, &left, 0);
														
 
															+	if (ret) {
														
 
															+		/* we hit -ENOSPC, but it isn't fatal here */
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	free_space = btrfs_leaf_free_space(root, left);
														
 
															+	if (free_space < data_size + sizeof(struct btrfs_item)) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (empty)
														
 
															+		nr = right_nritems;
														
 
															+	else
														
 
															+		nr = right_nritems - 1;
														
 
															+
														
 
															+	for (i = 0; i < nr; i++) {
														
 
															+		item = btrfs_item_nr(right, i);
														
 
															+		if (!right->map_token) {
														
 
															+			map_extent_buffer(right, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&right->map_token, &right->kaddr,
														
 
															+					&right->map_start, &right->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+
														
 
															+		if (!empty && push_items > 0) {
														
 
															+			if (path->slots[0] < i)
														
 
															+				break;
														
 
															+			if (path->slots[0] == i) {
														
 
															+				int space = btrfs_leaf_free_space(root, right);
														
 
															+				if (space + push_space * 2 > free_space)
														
 
															+					break;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if (path->slots[0] == i)
														
 
															+			push_space += data_size + sizeof(*item);
														
 
															+
														
 
															+		this_item_size = btrfs_item_size(right, item);
														
 
															+		if (this_item_size + sizeof(*item) + push_space > free_space)
														
 
															+			break;
														
 
															+
														
 
															+		push_items++;
														
 
															+		push_space += this_item_size + sizeof(*item);
														
 
															+	}
														
 
															+
														
 
															+	if (right->map_token) {
														
 
															+		unmap_extent_buffer(right, right->map_token, KM_USER1);
														
 
															+		right->map_token = NULL;
														
 
															+	}
														
 
															+
														
 
															+	if (push_items == 0) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (!empty && push_items == btrfs_header_nritems(right))
														
 
															+		WARN_ON(1);
														
 
															+
														
 
															+	/* push data from right to left */
														
 
															+	copy_extent_buffer(left, right,
														
 
															+			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
														
 
															+			   btrfs_item_nr_offset(0),
														
 
															+			   push_items * sizeof(struct btrfs_item));
														
 
															+
														
 
															+	push_space = BTRFS_LEAF_DATA_SIZE(root) -
														
 
															+		     btrfs_item_offset_nr(right, push_items -1);
														
 
															+
														
 
															+	copy_extent_buffer(left, right, btrfs_leaf_data(left) +
														
 
															+		     leaf_data_end(root, left) - push_space,
														
 
															+		     btrfs_leaf_data(right) +
														
 
															+		     btrfs_item_offset_nr(right, push_items - 1),
														
 
															+		     push_space);
														
 
															+	old_left_nritems = btrfs_header_nritems(left);
														
 
															+	BUG_ON(old_left_nritems < 0);
														
 
															+
														
 
															+	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
														
 
															+	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
														
 
															+		u32 ioff;
														
 
															+
														
 
															+		item = btrfs_item_nr(left, i);
														
 
															+		if (!left->map_token) {
														
 
															+			map_extent_buffer(left, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&left->map_token, &left->kaddr,
														
 
															+					&left->map_start, &left->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+
														
 
															+		ioff = btrfs_item_offset(left, item);
														
 
															+		btrfs_set_item_offset(left, item,
														
 
															+		      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
														
 
															+	}
														
 
															+	btrfs_set_header_nritems(left, old_left_nritems + push_items);
														
 
															+	if (left->map_token) {
														
 
															+		unmap_extent_buffer(left, left->map_token, KM_USER1);
														
 
															+		left->map_token = NULL;
														
 
															+	}
														
 
															+
														
 
															+	/* fixup right node */
														
 
															+	if (push_items > right_nritems) {
														
 
															+		printk("push items %d nr %u\n", push_items, right_nritems);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	if (push_items < right_nritems) {
														
 
															+		push_space = btrfs_item_offset_nr(right, push_items - 1) -
														
 
															+						  leaf_data_end(root, right);
														
 
															+		memmove_extent_buffer(right, btrfs_leaf_data(right) +
														
 
															+				      BTRFS_LEAF_DATA_SIZE(root) - push_space,
														
 
															+				      btrfs_leaf_data(right) +
														
 
															+				      leaf_data_end(root, right), push_space);
														
 
															+
														
 
															+		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
														
 
															+			      btrfs_item_nr_offset(push_items),
														
 
															+			     (btrfs_header_nritems(right) - push_items) *
														
 
															+			     sizeof(struct btrfs_item));
														
 
															+	}
														
 
															+	right_nritems -= push_items;
														
 
															+	btrfs_set_header_nritems(right, right_nritems);
														
 
															+	push_space = BTRFS_LEAF_DATA_SIZE(root);
														
 
															+	for (i = 0; i < right_nritems; i++) {
														
 
															+		item = btrfs_item_nr(right, i);
														
 
															+
														
 
															+		if (!right->map_token) {
														
 
															+			map_extent_buffer(right, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&right->map_token, &right->kaddr,
														
 
															+					&right->map_start, &right->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+
														
 
															+		push_space = push_space - btrfs_item_size(right, item);
														
 
															+		btrfs_set_item_offset(right, item, push_space);
														
 
															+	}
														
 
															+	if (right->map_token) {
														
 
															+		unmap_extent_buffer(right, right->map_token, KM_USER1);
														
 
															+		right->map_token = NULL;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(left);
														
 
															+	if (right_nritems)
														
 
															+		btrfs_mark_buffer_dirty(right);
														
 
															+
														
 
															+	ret = btrfs_update_ref(trans, root, right, left,
														
 
															+			       old_left_nritems, push_items);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_item_key(right, &disk_key, 0);
														
 
															+	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
														
 
															+	if (wret)
														
 
															+		ret = wret;
														
 
															+
														
 
															+	/* then fixup the leaf pointer in the path */
														
 
															+	if (path->slots[0] < push_items) {
														
 
															+		path->slots[0] += old_left_nritems;
														
 
															+		if (btrfs_header_nritems(path->nodes[0]) == 0)
														
 
															+			clean_tree_block(trans, root, path->nodes[0]);
														
 
															+		btrfs_tree_unlock(path->nodes[0]);
														
 
															+		free_extent_buffer(path->nodes[0]);
														
 
															+		path->nodes[0] = left;
														
 
															+		path->slots[1] -= 1;
														
 
															+	} else {
														
 
															+		btrfs_tree_unlock(left);
														
 
															+		free_extent_buffer(left);
														
 
															+		path->slots[0] -= push_items;
														
 
															+	}
														
 
															+	BUG_ON(path->slots[0] < 0);
														
 
															+	return ret;
														
 
															+out:
														
 
															+	btrfs_tree_unlock(left);
														
 
															+	free_extent_buffer(left);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * split the path's leaf in two, making sure there is at least data_size
														
 
															+ * available for the resulting leaf level of the path.
														
 
															+ *
														
 
															+ * returns 0 if all went well and < 0 on failure.
														
 
															+ */
														
 
															+static noinline int split_leaf(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       struct btrfs_key *ins_key,
														
 
															+			       struct btrfs_path *path, int data_size,
														
 
															+			       int extend)
														
 
															+{
														
 
															+	struct extent_buffer *l;
														
 
															+	u32 nritems;
														
 
															+	int mid;
														
 
															+	int slot;
														
 
															+	struct extent_buffer *right;
														
 
															+	int space_needed = data_size + sizeof(struct btrfs_item);
														
 
															+	int data_copy_size;
														
 
															+	int rt_data_off;
														
 
															+	int i;
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	int double_split;
														
 
															+	int num_doubles = 0;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+
														
 
															+	if (extend)
														
 
															+		space_needed = data_size;
														
 
															+
														
 
															+	/* first try to make some room by pushing left and right */
														
 
															+	if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
														
 
															+		wret = push_leaf_right(trans, root, path, data_size, 0);
														
 
															+		if (wret < 0) {
														
 
															+			return wret;
														
 
															+		}
														
 
															+		if (wret) {
														
 
															+			wret = push_leaf_left(trans, root, path, data_size, 0);
														
 
															+			if (wret < 0)
														
 
															+				return wret;
														
 
															+		}
														
 
															+		l = path->nodes[0];
														
 
															+
														
 
															+		/* did the pushes work? */
														
 
															+		if (btrfs_leaf_free_space(root, l) >= space_needed)
														
 
															+			return 0;
														
 
															+	}
														
 
															+
														
 
															+	if (!path->nodes[1]) {
														
 
															+		ret = insert_new_root(trans, root, path, 1);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+	}
														
 
															+again:
														
 
															+	double_split = 0;
														
 
															+	l = path->nodes[0];
														
 
															+	slot = path->slots[0];
														
 
															+	nritems = btrfs_header_nritems(l);
														
 
															+	mid = (nritems + 1)/ 2;
														
 
															+
														
 
															+	right = btrfs_alloc_free_block(trans, root, root->leafsize,
														
 
															+					path->nodes[1]->start,
														
 
															+					root->root_key.objectid,
														
 
															+					trans->transid, 0, l->start, 0);
														
 
															+	if (IS_ERR(right)) {
														
 
															+		BUG_ON(1);
														
 
															+		return PTR_ERR(right);
														
 
															+	}
														
 
															+
														
 
															+	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
														
 
															+	btrfs_set_header_bytenr(right, right->start);
														
 
															+	btrfs_set_header_generation(right, trans->transid);
														
 
															+	btrfs_set_header_owner(right, root->root_key.objectid);
														
 
															+	btrfs_set_header_level(right, 0);
														
 
															+	write_extent_buffer(right, root->fs_info->fsid,
														
 
															+			    (unsigned long)btrfs_header_fsid(right),
														
 
															+			    BTRFS_FSID_SIZE);
														
 
															+
														
 
															+	write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
														
 
															+			    (unsigned long)btrfs_header_chunk_tree_uuid(right),
														
 
															+			    BTRFS_UUID_SIZE);
														
 
															+	if (mid <= slot) {
														
 
															+		if (nritems == 1 ||
														
 
															+		    leaf_space_used(l, mid, nritems - mid) + space_needed >
														
 
															+			BTRFS_LEAF_DATA_SIZE(root)) {
														
 
															+			if (slot >= nritems) {
														
 
															+				btrfs_cpu_key_to_disk(&disk_key, ins_key);
														
 
															+				btrfs_set_header_nritems(right, 0);
														
 
															+				wret = insert_ptr(trans, root, path,
														
 
															+						  &disk_key, right->start,
														
 
															+						  path->slots[1] + 1, 1);
														
 
															+				if (wret)
														
 
															+					ret = wret;
														
 
															+
														
 
															+				btrfs_tree_unlock(path->nodes[0]);
														
 
															+				free_extent_buffer(path->nodes[0]);
														
 
															+				path->nodes[0] = right;
														
 
															+				path->slots[0] = 0;
														
 
															+				path->slots[1] += 1;
														
 
															+				btrfs_mark_buffer_dirty(right);
														
 
															+				return ret;
														
 
															+			}
														
 
															+			mid = slot;
														
 
															+			if (mid != nritems &&
														
 
															+			    leaf_space_used(l, mid, nritems - mid) +
														
 
															+			    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
														
 
															+				double_split = 1;
														
 
															+			}
														
 
															+		}
														
 
															+	} else {
														
 
															+		if (leaf_space_used(l, 0, mid + 1) + space_needed >
														
 
															+			BTRFS_LEAF_DATA_SIZE(root)) {
														
 
															+			if (!extend && slot == 0) {
														
 
															+				btrfs_cpu_key_to_disk(&disk_key, ins_key);
														
 
															+				btrfs_set_header_nritems(right, 0);
														
 
															+				wret = insert_ptr(trans, root, path,
														
 
															+						  &disk_key,
														
 
															+						  right->start,
														
 
															+						  path->slots[1], 1);
														
 
															+				if (wret)
														
 
															+					ret = wret;
														
 
															+				btrfs_tree_unlock(path->nodes[0]);
														
 
															+				free_extent_buffer(path->nodes[0]);
														
 
															+				path->nodes[0] = right;
														
 
															+				path->slots[0] = 0;
														
 
															+				if (path->slots[1] == 0) {
														
 
															+					wret = fixup_low_keys(trans, root,
														
 
															+					           path, &disk_key, 1);
														
 
															+					if (wret)
														
 
															+						ret = wret;
														
 
															+				}
														
 
															+				btrfs_mark_buffer_dirty(right);
														
 
															+				return ret;
														
 
															+			} else if (extend && slot == 0) {
														
 
															+				mid = 1;
														
 
															+			} else {
														
 
															+				mid = slot;
														
 
															+				if (mid != nritems &&
														
 
															+				    leaf_space_used(l, mid, nritems - mid) +
														
 
															+				    space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
														
 
															+					double_split = 1;
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	nritems = nritems - mid;
														
 
															+	btrfs_set_header_nritems(right, nritems);
														
 
															+	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
														
 
															+
														
 
															+	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
														
 
															+			   btrfs_item_nr_offset(mid),
														
 
															+			   nritems * sizeof(struct btrfs_item));
														
 
															+
														
 
															+	copy_extent_buffer(right, l,
														
 
															+		     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
														
 
															+		     data_copy_size, btrfs_leaf_data(l) +
														
 
															+		     leaf_data_end(root, l), data_copy_size);
														
 
															+
														
 
															+	rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
														
 
															+		      btrfs_item_end_nr(l, mid);
														
 
															+
														
 
															+	for (i = 0; i < nritems; i++) {
														
 
															+		struct btrfs_item *item = btrfs_item_nr(right, i);
														
 
															+		u32 ioff;
														
 
															+
														
 
															+		if (!right->map_token) {
														
 
															+			map_extent_buffer(right, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&right->map_token, &right->kaddr,
														
 
															+					&right->map_start, &right->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+
														
 
															+		ioff = btrfs_item_offset(right, item);
														
 
															+		btrfs_set_item_offset(right, item, ioff + rt_data_off);
														
 
															+	}
														
 
															+
														
 
															+	if (right->map_token) {
														
 
															+		unmap_extent_buffer(right, right->map_token, KM_USER1);
														
 
															+		right->map_token = NULL;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_header_nritems(l, mid);
														
 
															+	ret = 0;
														
 
															+	btrfs_item_key(right, &disk_key, 0);
														
 
															+	wret = insert_ptr(trans, root, path, &disk_key, right->start,
														
 
															+			  path->slots[1] + 1, 1);
														
 
															+	if (wret)
														
 
															+		ret = wret;
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(right);
														
 
															+	btrfs_mark_buffer_dirty(l);
														
 
															+	BUG_ON(path->slots[0] != slot);
														
 
															+
														
 
															+	ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (mid <= slot) {
														
 
															+		btrfs_tree_unlock(path->nodes[0]);
														
 
															+		free_extent_buffer(path->nodes[0]);
														
 
															+		path->nodes[0] = right;
														
 
															+		path->slots[0] -= mid;
														
 
															+		path->slots[1] += 1;
														
 
															+	} else {
														
 
															+		btrfs_tree_unlock(right);
														
 
															+		free_extent_buffer(right);
														
 
															+	}
														
 
															+
														
 
															+	BUG_ON(path->slots[0] < 0);
														
 
															+
														
 
															+	if (double_split) {
														
 
															+		BUG_ON(num_doubles != 0);
														
 
															+		num_doubles++;
														
 
															+		goto again;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * make the item pointed to by the path smaller.  new_size indicates
														
 
															+ * how small to make it, and from_end tells us if we just chop bytes
														
 
															+ * off the end of the item or if we shift the item to chop bytes off
														
 
															+ * the front.
														
 
															+ */
														
 
															+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root,
														
 
															+			struct btrfs_path *path,
														
 
															+			u32 new_size, int from_end)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	int slot;
														
 
															+	int slot_orig;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_item *item;
														
 
															+	u32 nritems;
														
 
															+	unsigned int data_end;
														
 
															+	unsigned int old_data_start;
														
 
															+	unsigned int old_size;
														
 
															+	unsigned int size_diff;
														
 
															+	int i;
														
 
															+
														
 
															+	slot_orig = path->slots[0];
														
 
															+	leaf = path->nodes[0];
														
 
															+	slot = path->slots[0];
														
 
															+
														
 
															+	old_size = btrfs_item_size_nr(leaf, slot);
														
 
															+	if (old_size == new_size)
														
 
															+		return 0;
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(leaf);
														
 
															+	data_end = leaf_data_end(root, leaf);
														
 
															+
														
 
															+	old_data_start = btrfs_item_offset_nr(leaf, slot);
														
 
															+
														
 
															+	size_diff = old_size - new_size;
														
 
															+
														
 
															+	BUG_ON(slot < 0);
														
 
															+	BUG_ON(slot >= nritems);
														
 
															+
														
 
															+	/*
														
 
															+	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
														
 
															+	 */
														
 
															+	/* first correct the data pointers */
														
 
															+	for (i = slot; i < nritems; i++) {
														
 
															+		u32 ioff;
														
 
															+		item = btrfs_item_nr(leaf, i);
														
 
															+
														
 
															+		if (!leaf->map_token) {
														
 
															+			map_extent_buffer(leaf, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&leaf->map_token, &leaf->kaddr,
														
 
															+					&leaf->map_start, &leaf->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+
														
 
															+		ioff = btrfs_item_offset(leaf, item);
														
 
															+		btrfs_set_item_offset(leaf, item, ioff + size_diff);
														
 
															+	}
														
 
															+
														
 
															+	if (leaf->map_token) {
														
 
															+		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
														
 
															+		leaf->map_token = NULL;
														
 
															+	}
														
 
															+
														
 
															+	/* shift the data */
														
 
															+	if (from_end) {
														
 
															+		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
														
 
															+			      data_end + size_diff, btrfs_leaf_data(leaf) +
														
 
															+			      data_end, old_data_start + new_size - data_end);
														
 
															+	} else {
														
 
															+		struct btrfs_disk_key disk_key;
														
 
															+		u64 offset;
														
 
															+
														
 
															+		btrfs_item_key(leaf, &disk_key, slot);
														
 
															+
														
 
															+		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
														
 
															+			unsigned long ptr;
														
 
															+			struct btrfs_file_extent_item *fi;
														
 
															+
														
 
															+			fi = btrfs_item_ptr(leaf, slot,
														
 
															+					    struct btrfs_file_extent_item);
														
 
															+			fi = (struct btrfs_file_extent_item *)(
														
 
															+			     (unsigned long)fi - size_diff);
														
 
															+
														
 
															+			if (btrfs_file_extent_type(leaf, fi) ==
														
 
															+			    BTRFS_FILE_EXTENT_INLINE) {
														
 
															+				ptr = btrfs_item_ptr_offset(leaf, slot);
														
 
															+				memmove_extent_buffer(leaf, ptr,
														
 
															+				        (unsigned long)fi,
														
 
															+				        offsetof(struct btrfs_file_extent_item,
														
 
															+						 disk_bytenr));
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
														
 
															+			      data_end + size_diff, btrfs_leaf_data(leaf) +
														
 
															+			      data_end, old_data_start - data_end);
														
 
															+
														
 
															+		offset = btrfs_disk_key_offset(&disk_key);
														
 
															+		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
														
 
															+		btrfs_set_item_key(leaf, &disk_key, slot);
														
 
															+		if (slot == 0)
														
 
															+			fixup_low_keys(trans, root, path, &disk_key, 1);
														
 
															+	}
														
 
															+
														
 
															+	item = btrfs_item_nr(leaf, slot);
														
 
															+	btrfs_set_item_size(leaf, item, new_size);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+	ret = 0;
														
 
															+	if (btrfs_leaf_free_space(root, leaf) < 0) {
														
 
															+		btrfs_print_leaf(root, leaf);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * make the item pointed to by the path bigger, data_size is the new size.
														
 
															+ */
														
 
															+int btrfs_extend_item(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *root, struct btrfs_path *path,
														
 
															+		      u32 data_size)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	int slot;
														
 
															+	int slot_orig;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_item *item;
														
 
															+	u32 nritems;
														
 
															+	unsigned int data_end;
														
 
															+	unsigned int old_data;
														
 
															+	unsigned int old_size;
														
 
															+	int i;
														
 
															+
														
 
															+	slot_orig = path->slots[0];
														
 
															+	leaf = path->nodes[0];
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(leaf);
														
 
															+	data_end = leaf_data_end(root, leaf);
														
 
															+
														
 
															+	if (btrfs_leaf_free_space(root, leaf) < data_size) {
														
 
															+		btrfs_print_leaf(root, leaf);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	slot = path->slots[0];
														
 
															+	old_data = btrfs_item_end_nr(leaf, slot);
														
 
															+
														
 
															+	BUG_ON(slot < 0);
														
 
															+	if (slot >= nritems) {
														
 
															+		btrfs_print_leaf(root, leaf);
														
 
															+		printk("slot %d too large, nritems %d\n", slot, nritems);
														
 
															+		BUG_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
														
 
															+	 */
														
 
															+	/* first correct the data pointers */
														
 
															+	for (i = slot; i < nritems; i++) {
														
 
															+		u32 ioff;
														
 
															+		item = btrfs_item_nr(leaf, i);
														
 
															+
														
 
															+		if (!leaf->map_token) {
														
 
															+			map_extent_buffer(leaf, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&leaf->map_token, &leaf->kaddr,
														
 
															+					&leaf->map_start, &leaf->map_len,
														
 
															+					KM_USER1);
														
 
															+		}
														
 
															+		ioff = btrfs_item_offset(leaf, item);
														
 
															+		btrfs_set_item_offset(leaf, item, ioff - data_size);
														
 
															+	}
														
 
															+
														
 
															+	if (leaf->map_token) {
														
 
															+		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
														
 
															+		leaf->map_token = NULL;
														
 
															+	}
														
 
															+
														
 
															+	/* shift the data */
														
 
															+	memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
														
 
															+		      data_end - data_size, btrfs_leaf_data(leaf) +
														
 
															+		      data_end, old_data - data_end);
														
 
															+
														
 
															+	data_end = old_data;
														
 
															+	old_size = btrfs_item_size_nr(leaf, slot);
														
 
															+	item = btrfs_item_nr(leaf, slot);
														
 
															+	btrfs_set_item_size(leaf, item, old_size + data_size);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+	ret = 0;
														
 
															+	if (btrfs_leaf_free_space(root, leaf) < 0) {
														
 
															+		btrfs_print_leaf(root, leaf);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Given a key and some data, insert items into the tree.
														
 
															+ * This does all the path init required, making room in the tree if needed.
														
 
															+ */
														
 
															+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root,
														
 
															+			    struct btrfs_path *path,
														
 
															+			    struct btrfs_key *cpu_key, u32 *data_size,
														
 
															+			    int nr)
														
 
															+{
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_item *item;
														
 
															+	int ret = 0;
														
 
															+	int slot;
														
 
															+	int slot_orig;
														
 
															+	int i;
														
 
															+	u32 nritems;
														
 
															+	u32 total_size = 0;
														
 
															+	u32 total_data = 0;
														
 
															+	unsigned int data_end;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+
														
 
															+	for (i = 0; i < nr; i++) {
														
 
															+		total_data += data_size[i];
														
 
															+	}
														
 
															+
														
 
															+	total_size = total_data + (nr * sizeof(struct btrfs_item));
														
 
															+	ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
														
 
															+	if (ret == 0)
														
 
															+		return -EEXIST;
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	slot_orig = path->slots[0];
														
 
															+	leaf = path->nodes[0];
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(leaf);
														
 
															+	data_end = leaf_data_end(root, leaf);
														
 
															+
														
 
															+	if (btrfs_leaf_free_space(root, leaf) < total_size) {
														
 
															+		btrfs_print_leaf(root, leaf);
														
 
															+		printk("not enough freespace need %u have %d\n",
														
 
															+		       total_size, btrfs_leaf_free_space(root, leaf));
														
 
															+		BUG();
														
 
															+	}
														
 
															+
														
 
															+	slot = path->slots[0];
														
 
															+	BUG_ON(slot < 0);
														
 
															+
														
 
															+	if (slot != nritems) {
														
 
															+		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
														
 
															+
														
 
															+		if (old_data < data_end) {
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("slot %d old_data %d data_end %d\n",
														
 
															+			       slot, old_data, data_end);
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+		/*
														
 
															+		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
														
 
															+		 */
														
 
															+		/* first correct the data pointers */
														
 
															+		WARN_ON(leaf->map_token);
														
 
															+		for (i = slot; i < nritems; i++) {
														
 
															+			u32 ioff;
														
 
															+
														
 
															+			item = btrfs_item_nr(leaf, i);
														
 
															+			if (!leaf->map_token) {
														
 
															+				map_extent_buffer(leaf, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&leaf->map_token, &leaf->kaddr,
														
 
															+					&leaf->map_start, &leaf->map_len,
														
 
															+					KM_USER1);
														
 
															+			}
														
 
															+
														
 
															+			ioff = btrfs_item_offset(leaf, item);
														
 
															+			btrfs_set_item_offset(leaf, item, ioff - total_data);
														
 
															+		}
														
 
															+		if (leaf->map_token) {
														
 
															+			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
														
 
															+			leaf->map_token = NULL;
														
 
															+		}
														
 
															+
														
 
															+		/* shift the items */
														
 
															+		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
														
 
															+			      btrfs_item_nr_offset(slot),
														
 
															+			      (nritems - slot) * sizeof(struct btrfs_item));
														
 
															+
														
 
															+		/* shift the data */
														
 
															+		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
														
 
															+			      data_end - total_data, btrfs_leaf_data(leaf) +
														
 
															+			      data_end, old_data - data_end);
														
 
															+		data_end = old_data;
														
 
															+	}
														
 
															+
														
 
															+	/* setup the item for the new data */
														
 
															+	for (i = 0; i < nr; i++) {
														
 
															+		btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
														
 
															+		btrfs_set_item_key(leaf, &disk_key, slot + i);
														
 
															+		item = btrfs_item_nr(leaf, slot + i);
														
 
															+		btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
														
 
															+		data_end -= data_size[i];
														
 
															+		btrfs_set_item_size(leaf, item, data_size[i]);
														
 
															+	}
														
 
															+	btrfs_set_header_nritems(leaf, nritems + nr);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+	ret = 0;
														
 
															+	if (slot == 0) {
														
 
															+		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
														
 
															+		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
														
 
															+	}
														
 
															+
														
 
															+	if (btrfs_leaf_free_space(root, leaf) < 0) {
														
 
															+		btrfs_print_leaf(root, leaf);
														
 
															+		BUG();
														
 
															+	}
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Given a key and some data, insert an item into the tree.
														
 
															+ * This does all the path init required, making room in the tree if needed.
														
 
															+ */
														
 
															+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *cpu_key, void *data, u32
														
 
															+		      data_size)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	unsigned long ptr;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
														
 
															+	if (!ret) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
														
 
															+		write_extent_buffer(leaf, data, ptr, data_size);
														
 
															+		btrfs_mark_buffer_dirty(leaf);
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * delete the pointer from a given node.
														
 
															+ *
														
 
															+ * the tree should have been previously balanced so the deletion does not
														
 
															+ * empty a node.
														
 
															+ */
														
 
															+static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		   struct btrfs_path *path, int level, int slot)
														
 
															+{
														
 
															+	struct extent_buffer *parent = path->nodes[level];
														
 
															+	u32 nritems;
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(parent);
														
 
															+	if (slot != nritems -1) {
														
 
															+		memmove_extent_buffer(parent,
														
 
															+			      btrfs_node_key_ptr_offset(slot),
														
 
															+			      btrfs_node_key_ptr_offset(slot + 1),
														
 
															+			      sizeof(struct btrfs_key_ptr) *
														
 
															+			      (nritems - slot - 1));
														
 
															+	}
														
 
															+	nritems--;
														
 
															+	btrfs_set_header_nritems(parent, nritems);
														
 
															+	if (nritems == 0 && parent == root->node) {
														
 
															+		BUG_ON(btrfs_header_level(root->node) != 1);
														
 
															+		/* just turn the root into a leaf and break */
														
 
															+		btrfs_set_header_level(root->node, 0);
														
 
															+	} else if (slot == 0) {
														
 
															+		struct btrfs_disk_key disk_key;
														
 
															+
														
 
															+		btrfs_node_key(parent, &disk_key, 0);
														
 
															+		wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
														
 
															+		if (wret)
														
 
															+			ret = wret;
														
 
															+	}
														
 
															+	btrfs_mark_buffer_dirty(parent);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * a helper function to delete the leaf pointed to by path->slots[1] and
														
 
															+ * path->nodes[1].  bytenr is the node block pointer, but since the callers
														
 
															+ * already know it, it is faster to have them pass it down than to
														
 
															+ * read it out of the node again.
														
 
															+ *
														
 
															+ * This deletes the pointer in path->nodes[1] and frees the leaf
														
 
															+ * block extent.  zero is returned if it all worked out, < 0 otherwise.
														
 
															+ *
														
 
															+ * The path must have already been setup for deleting the leaf, including
														
 
															+ * all the proper balancing.  path->nodes[1] must be locked.
														
 
															+ */
														
 
															+noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root,
														
 
															+			    struct btrfs_path *path, u64 bytenr)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u64 root_gen = btrfs_header_generation(path->nodes[1]);
														
 
															+
														
 
															+	ret = del_ptr(trans, root, path, 1, path->slots[1]);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = btrfs_free_extent(trans, root, bytenr,
														
 
															+				btrfs_level_size(root, 0),
														
 
															+				path->nodes[1]->start,
														
 
															+				btrfs_header_owner(path->nodes[1]),
														
 
															+				root_gen, 0, 1);
														
 
															+	return ret;
														
 
															+}
														
 
															+/*
														
 
															+ * delete the item at the leaf level in path.  If that empties
														
 
															+ * the leaf, remove it from the tree
														
 
															+ */
														
 
															+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		    struct btrfs_path *path, int slot, int nr)
														
 
															+{
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_item *item;
														
 
															+	int last_off;
														
 
															+	int dsize = 0;
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	int i;
														
 
															+	u32 nritems;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
														
 
															+
														
 
															+	for (i = 0; i < nr; i++)
														
 
															+		dsize += btrfs_item_size_nr(leaf, slot + i);
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(leaf);
														
 
															+
														
 
															+	if (slot + nr != nritems) {
														
 
															+		int data_end = leaf_data_end(root, leaf);
														
 
															+
														
 
															+		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
														
 
															+			      data_end + dsize,
														
 
															+			      btrfs_leaf_data(leaf) + data_end,
														
 
															+			      last_off - data_end);
														
 
															+
														
 
															+		for (i = slot + nr; i < nritems; i++) {
														
 
															+			u32 ioff;
														
 
															+
														
 
															+			item = btrfs_item_nr(leaf, i);
														
 
															+			if (!leaf->map_token) {
														
 
															+				map_extent_buffer(leaf, (unsigned long)item,
														
 
															+					sizeof(struct btrfs_item),
														
 
															+					&leaf->map_token, &leaf->kaddr,
														
 
															+					&leaf->map_start, &leaf->map_len,
														
 
															+					KM_USER1);
														
 
															+			}
														
 
															+			ioff = btrfs_item_offset(leaf, item);
														
 
															+			btrfs_set_item_offset(leaf, item, ioff + dsize);
														
 
															+		}
														
 
															+
														
 
															+		if (leaf->map_token) {
														
 
															+			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
														
 
															+			leaf->map_token = NULL;
														
 
															+		}
														
 
															+
														
 
															+		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
														
 
															+			      btrfs_item_nr_offset(slot + nr),
														
 
															+			      sizeof(struct btrfs_item) *
														
 
															+			      (nritems - slot - nr));
														
 
															+	}
														
 
															+	btrfs_set_header_nritems(leaf, nritems - nr);
														
 
															+	nritems -= nr;
														
 
															+
														
 
															+	/* delete the leaf if we've emptied it */
														
 
															+	if (nritems == 0) {
														
 
															+		if (leaf == root->node) {
														
 
															+			btrfs_set_header_level(leaf, 0);
														
 
															+		} else {
														
 
															+			ret = btrfs_del_leaf(trans, root, path, leaf->start);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+	} else {
														
 
															+		int used = leaf_space_used(leaf, 0, nritems);
														
 
															+		if (slot == 0) {
														
 
															+			struct btrfs_disk_key disk_key;
														
 
															+
														
 
															+			btrfs_item_key(leaf, &disk_key, 0);
														
 
															+			wret = fixup_low_keys(trans, root, path,
														
 
															+					      &disk_key, 1);
														
 
															+			if (wret)
														
 
															+				ret = wret;
														
 
															+		}
														
 
															+
														
 
															+		/* delete the leaf if it is mostly empty */
														
 
															+		if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
														
 
															+			/* push_leaf_left fixes the path.
														
 
															+			 * make sure the path still points to our leaf
														
 
															+			 * for possible call to del_ptr below
														
 
															+			 */
														
 
															+			slot = path->slots[1];
														
 
															+			extent_buffer_get(leaf);
														
 
															+
														
 
															+			wret = push_leaf_left(trans, root, path, 1, 1);
														
 
															+			if (wret < 0 && wret != -ENOSPC)
														
 
															+				ret = wret;
														
 
															+
														
 
															+			if (path->nodes[0] == leaf &&
														
 
															+			    btrfs_header_nritems(leaf)) {
														
 
															+				wret = push_leaf_right(trans, root, path, 1, 1);
														
 
															+				if (wret < 0 && wret != -ENOSPC)
														
 
															+					ret = wret;
														
 
															+			}
														
 
															+
														
 
															+			if (btrfs_header_nritems(leaf) == 0) {
														
 
															+				path->slots[1] = slot;
														
 
															+				ret = btrfs_del_leaf(trans, root, path, leaf->start);
														
 
															+				BUG_ON(ret);
														
 
															+				free_extent_buffer(leaf);
														
 
															+			} else {
														
 
															+				/* if we're still in the path, make sure
														
 
															+				 * we're dirty.  Otherwise, one of the
														
 
															+				 * push_leaf functions must have already
														
 
															+				 * dirtied this buffer
														
 
															+				 */
														
 
															+				if (path->nodes[0] == leaf)
														
 
															+					btrfs_mark_buffer_dirty(leaf);
														
 
															+				free_extent_buffer(leaf);
														
 
															+			}
														
 
															+		} else {
														
 
															+			btrfs_mark_buffer_dirty(leaf);
														
 
															+		}
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * search the tree again to find a leaf with lesser keys
														
 
															+ * returns 0 if it found something or 1 if there are no lesser leaves.
														
 
															+ * returns < 0 on io errors.
														
 
															+ *
														
 
															+ * This may release the path, and so you may lose any locks held at the
														
 
															+ * time you call it.
														
 
															+ */
														
 
															+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_disk_key found_key;
														
 
															+	int ret;
														
 
															+
														
 
															+	btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
														
 
															+
														
 
															+	if (key.offset > 0)
														
 
															+		key.offset--;
														
 
															+	else if (key.type > 0)
														
 
															+		key.type--;
														
 
															+	else if (key.objectid > 0)
														
 
															+		key.objectid--;
														
 
															+	else
														
 
															+		return 1;
														
 
															+
														
 
															+	btrfs_release_path(root, path);
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		return ret;
														
 
															+	btrfs_item_key(path->nodes[0], &found_key, 0);
														
 
															+	ret = comp_keys(&found_key, &key);
														
 
															+	if (ret < 0)
														
 
															+		return 0;
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * A helper function to walk down the tree starting at min_key, and looking
														
 
															+ * for nodes or leaves that are either in cache or have a minimum
														
 
															+ * transaction id.  This is used by the btree defrag code, and tree logging
														
 
															+ *
														
 
															+ * This does not cow, but it does stuff the starting key it finds back
														
 
															+ * into min_key, so you can call btrfs_search_slot with cow=1 on the
														
 
															+ * key and get a writable path.
														
 
															+ *
														
 
															+ * This does lock as it descends, and path->keep_locks should be set
														
 
															+ * to 1 by the caller.
														
 
															+ *
														
 
															+ * This honors path->lowest_level to prevent descent past a given level
														
 
															+ * of the tree.
														
 
															+ *
														
 
															+ * min_trans indicates the oldest transaction that you are interested
														
 
															+ * in walking through.  Any nodes or leaves older than min_trans are
														
 
															+ * skipped over (without reading them).
														
 
															+ *
														
 
															+ * returns zero if something useful was found, < 0 on error and 1 if there
														
 
															+ * was nothing in the tree that matched the search criteria.
														
 
															+ */
														
 
															+int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
														
 
															+			 struct btrfs_key *max_key,
														
 
															+			 struct btrfs_path *path, int cache_only,
														
 
															+			 u64 min_trans)
														
 
															+{
														
 
															+	struct extent_buffer *cur;
														
 
															+	struct btrfs_key found_key;
														
 
															+	int slot;
														
 
															+	int sret;
														
 
															+	u32 nritems;
														
 
															+	int level;
														
 
															+	int ret = 1;
														
 
															+
														
 
															+again:
														
 
															+	cur = btrfs_lock_root_node(root);
														
 
															+	level = btrfs_header_level(cur);
														
 
															+	WARN_ON(path->nodes[level]);
														
 
															+	path->nodes[level] = cur;
														
 
															+	path->locks[level] = 1;
														
 
															+
														
 
															+	if (btrfs_header_generation(cur) < min_trans) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	while(1) {
														
 
															+		nritems = btrfs_header_nritems(cur);
														
 
															+		level = btrfs_header_level(cur);
														
 
															+		sret = bin_search(cur, min_key, level, &slot);
														
 
															+
														
 
															+		/* at the lowest level, we're done, setup the path and exit */
														
 
															+		if (level == path->lowest_level) {
														
 
															+			if (slot >= nritems)
														
 
															+				goto find_next_key;
														
 
															+			ret = 0;
														
 
															+			path->slots[level] = slot;
														
 
															+			btrfs_item_key_to_cpu(cur, &found_key, slot);
														
 
															+			goto out;
														
 
															+		}
														
 
															+		if (sret && slot > 0)
														
 
															+			slot--;
														
 
															+		/*
														
 
															+		 * check this node pointer against the cache_only and
														
 
															+		 * min_trans parameters.  If it isn't in cache or is too
														
 
															+		 * old, skip to the next one.
														
 
															+		 */
														
 
															+		while(slot < nritems) {
														
 
															+			u64 blockptr;
														
 
															+			u64 gen;
														
 
															+			struct extent_buffer *tmp;
														
 
															+			struct btrfs_disk_key disk_key;
														
 
															+
														
 
															+			blockptr = btrfs_node_blockptr(cur, slot);
														
 
															+			gen = btrfs_node_ptr_generation(cur, slot);
														
 
															+			if (gen < min_trans) {
														
 
															+				slot++;
														
 
															+				continue;
														
 
															+			}
														
 
															+			if (!cache_only)
														
 
															+				break;
														
 
															+
														
 
															+			if (max_key) {
														
 
															+				btrfs_node_key(cur, &disk_key, slot);
														
 
															+				if (comp_keys(&disk_key, max_key) >= 0) {
														
 
															+					ret = 1;
														
 
															+					goto out;
														
 
															+				}
														
 
															+			}
														
 
															+
														
 
															+			tmp = btrfs_find_tree_block(root, blockptr,
														
 
															+					    btrfs_level_size(root, level - 1));
														
 
															+
														
 
															+			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
														
 
															+				free_extent_buffer(tmp);
														
 
															+				break;
														
 
															+			}
														
 
															+			if (tmp)
														
 
															+				free_extent_buffer(tmp);
														
 
															+			slot++;
														
 
															+		}
														
 
															+find_next_key:
														
 
															+		/*
														
 
															+		 * we didn't find a candidate key in this node, walk forward
														
 
															+		 * and find another one
														
 
															+		 */
														
 
															+		if (slot >= nritems) {
														
 
															+			path->slots[level] = slot;
														
 
															+			sret = btrfs_find_next_key(root, path, min_key, level,
														
 
															+						  cache_only, min_trans);
														
 
															+			if (sret == 0) {
														
 
															+				btrfs_release_path(root, path);
														
 
															+				goto again;
														
 
															+			} else {
														
 
															+				goto out;
														
 
															+			}
														
 
															+		}
														
 
															+		/* save our key for returning back */
														
 
															+		btrfs_node_key_to_cpu(cur, &found_key, slot);
														
 
															+		path->slots[level] = slot;
														
 
															+		if (level == path->lowest_level) {
														
 
															+			ret = 0;
														
 
															+			unlock_up(path, level, 1);
														
 
															+			goto out;
														
 
															+		}
														
 
															+		cur = read_node_slot(root, cur, slot);
														
 
															+
														
 
															+		btrfs_tree_lock(cur);
														
 
															+		path->locks[level - 1] = 1;
														
 
															+		path->nodes[level - 1] = cur;
														
 
															+		unlock_up(path, level, 1);
														
 
															+	}
														
 
															+out:
														
 
															+	if (ret == 0)
														
 
															+		memcpy(min_key, &found_key, sizeof(found_key));
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this is similar to btrfs_next_leaf, but does not try to preserve
														
 
															+ * and fixup the path.  It looks for and returns the next key in the
														
 
															+ * tree based on the current path and the cache_only and min_trans
														
 
															+ * parameters.
														
 
															+ *
														
 
															+ * 0 is returned if another key is found, < 0 if there are any errors
														
 
															+ * and 1 is returned if there are no higher keys in the tree
														
 
															+ *
														
 
															+ * path->keep_locks should be set to 1 on the search made before
														
 
															+ * calling this function.
														
 
															+ */
														
 
															+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			struct btrfs_key *key, int lowest_level,
														
 
															+			int cache_only, u64 min_trans)
														
 
															+{
														
 
															+	int level = lowest_level;
														
 
															+	int slot;
														
 
															+	struct extent_buffer *c;
														
 
															+
														
 
															+	while(level < BTRFS_MAX_LEVEL) {
														
 
															+		if (!path->nodes[level])
														
 
															+			return 1;
														
 
															+
														
 
															+		slot = path->slots[level] + 1;
														
 
															+		c = path->nodes[level];
														
 
															+next:
														
 
															+		if (slot >= btrfs_header_nritems(c)) {
														
 
															+			level++;
														
 
															+			if (level == BTRFS_MAX_LEVEL) {
														
 
															+				return 1;
														
 
															+			}
														
 
															+			continue;
														
 
															+		}
														
 
															+		if (level == 0)
														
 
															+			btrfs_item_key_to_cpu(c, key, slot);
														
 
															+		else {
														
 
															+			u64 blockptr = btrfs_node_blockptr(c, slot);
														
 
															+			u64 gen = btrfs_node_ptr_generation(c, slot);
														
 
															+
														
 
															+			if (cache_only) {
														
 
															+				struct extent_buffer *cur;
														
 
															+				cur = btrfs_find_tree_block(root, blockptr,
														
 
															+					    btrfs_level_size(root, level - 1));
														
 
															+				if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
														
 
															+					slot++;
														
 
															+					if (cur)
														
 
															+						free_extent_buffer(cur);
														
 
															+					goto next;
														
 
															+				}
														
 
															+				free_extent_buffer(cur);
														
 
															+			}
														
 
															+			if (gen < min_trans) {
														
 
															+				slot++;
														
 
															+				goto next;
														
 
															+			}
														
 
															+			btrfs_node_key_to_cpu(c, key, slot);
														
 
															+		}
														
 
															+		return 0;
														
 
															+	}
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * search the tree again to find a leaf with greater keys
														
 
															+ * returns 0 if it found something or 1 if there are no greater leaves.
														
 
															+ * returns < 0 on io errors.
														
 
															+ */
														
 
															+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
														
 
															+{
														
 
															+	int slot;
														
 
															+	int level = 1;
														
 
															+	struct extent_buffer *c;
														
 
															+	struct extent_buffer *next = NULL;
														
 
															+	struct btrfs_key key;
														
 
															+	u32 nritems;
														
 
															+	int ret;
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+	if (nritems == 0) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
														
 
															+
														
 
															+	btrfs_release_path(root, path);
														
 
															+	path->keep_locks = 1;
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	path->keep_locks = 0;
														
 
															+
														
 
															+	if (ret < 0)
														
 
															+		return ret;
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+	/*
														
 
															+	 * by releasing the path above we dropped all our locks.  A balance
														
 
															+	 * could have added more items next to the key that used to be
														
 
															+	 * at the very end of the block.  So, check again here and
														
 
															+	 * advance the path if there are now more items available.
														
 
															+	 */
														
 
															+	if (nritems > 0 && path->slots[0] < nritems - 1) {
														
 
															+		path->slots[0]++;
														
 
															+		goto done;
														
 
															+	}
														
 
															+
														
 
															+	while(level < BTRFS_MAX_LEVEL) {
														
 
															+		if (!path->nodes[level])
														
 
															+			return 1;
														
 
															+
														
 
															+		slot = path->slots[level] + 1;
														
 
															+		c = path->nodes[level];
														
 
															+		if (slot >= btrfs_header_nritems(c)) {
														
 
															+			level++;
														
 
															+			if (level == BTRFS_MAX_LEVEL) {
														
 
															+				return 1;
														
 
															+			}
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		if (next) {
														
 
															+			btrfs_tree_unlock(next);
														
 
															+			free_extent_buffer(next);
														
 
															+		}
														
 
															+
														
 
															+		if (level == 1 && (path->locks[1] || path->skip_locking) &&
														
 
															+		    path->reada)
														
 
															+			reada_for_search(root, path, level, slot, 0);
														
 
															+
														
 
															+		next = read_node_slot(root, c, slot);
														
 
															+		if (!path->skip_locking) {
														
 
															+			WARN_ON(!btrfs_tree_locked(c));
														
 
															+			btrfs_tree_lock(next);
														
 
															+		}
														
 
															+		break;
														
 
															+	}
														
 
															+	path->slots[level] = slot;
														
 
															+	while(1) {
														
 
															+		level--;
														
 
															+		c = path->nodes[level];
														
 
															+		if (path->locks[level])
														
 
															+			btrfs_tree_unlock(c);
														
 
															+		free_extent_buffer(c);
														
 
															+		path->nodes[level] = next;
														
 
															+		path->slots[level] = 0;
														
 
															+		if (!path->skip_locking)
														
 
															+			path->locks[level] = 1;
														
 
															+		if (!level)
														
 
															+			break;
														
 
															+		if (level == 1 && path->locks[1] && path->reada)
														
 
															+			reada_for_search(root, path, level, slot, 0);
														
 
															+		next = read_node_slot(root, next, 0);
														
 
															+		if (!path->skip_locking) {
														
 
															+			WARN_ON(!btrfs_tree_locked(path->nodes[level]));
														
 
															+			btrfs_tree_lock(next);
														
 
															+		}
														
 
															+	}
														
 
															+done:
														
 
															+	unlock_up(path, 0, 1);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
														
 
															+ * searching until it gets past min_objectid or finds an item of 'type'
														
 
															+ *
														
 
															+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
														
 
															+ */
														
 
															+int btrfs_previous_item(struct btrfs_root *root,
														
 
															+			struct btrfs_path *path, u64 min_objectid,
														
 
															+			int type)
														
 
															+{
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	u32 nritems;
														
 
															+	int ret;
														
 
															+
														
 
															+	while(1) {
														
 
															+		if (path->slots[0] == 0) {
														
 
															+			ret = btrfs_prev_leaf(root, path);
														
 
															+			if (ret != 0)
														
 
															+				return ret;
														
 
															+		} else {
														
 
															+			path->slots[0]--;
														
 
															+		}
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		if (nritems == 0)
														
 
															+			return 1;
														
 
															+		if (path->slots[0] == nritems)
														
 
															+			path->slots[0]--;
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		if (found_key.type == type)
														
 
															+			return 0;
														
 
															+		if (found_key.objectid < min_objectid)
														
 
															+			break;
														
 
															+		if (found_key.objectid == min_objectid &&
														
 
															+		    found_key.type < type)
														
 
															+			break;
														
 
															+	}
														
 
															+	return 1;
														
 
															+}
														
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -0,0 +1,1891 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_CTREE__
														
 
															+#define __BTRFS_CTREE__
														
 
															+
														
 
															+#include <linux/version.h>
														
 
															+#include <linux/mm.h>
														
 
															+#include <linux/highmem.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/completion.h>
														
 
															+#include <linux/backing-dev.h>
														
 
															+#include <linux/wait.h>
														
 
															+#include <asm/kmap_types.h>
														
 
															+#include "extent_io.h"
														
 
															+#include "extent_map.h"
														
 
															+#include "async-thread.h"
														
 
															+
														
 
															+struct btrfs_trans_handle;
														
 
															+struct btrfs_transaction;
														
 
															+extern struct kmem_cache *btrfs_trans_handle_cachep;
														
 
															+extern struct kmem_cache *btrfs_transaction_cachep;
														
 
															+extern struct kmem_cache *btrfs_bit_radix_cachep;
														
 
															+extern struct kmem_cache *btrfs_path_cachep;
														
 
															+struct btrfs_ordered_sum;
														
 
															+
														
 
															+#define BTRFS_MAGIC "_BBRfS_M"
														
 
															+
														
 
															+#define BTRFS_ACL_NOT_CACHED    ((void *)-1)
														
 
															+
														
 
															+#ifdef CONFIG_LOCKDEP
														
 
															+# define BTRFS_MAX_LEVEL 7
														
 
															+#else
														
 
															+# define BTRFS_MAX_LEVEL 8
														
 
															+#endif
														
 
															+
														
 
															+/* holds pointers to all of the tree roots */
														
 
															+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
														
 
															+
														
 
															+/* stores information about which extents are in use, and reference counts */
														
 
															+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
														
 
															+
														
 
															+/*
														
 
															+ * chunk tree stores translations from logical -> physical block numbering
														
 
															+ * the super block points to the chunk tree
														
 
															+ */
														
 
															+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
														
 
															+
														
 
															+/*
														
 
															+ * stores information about which areas of a given device are in use.
														
 
															+ * one per device.  The tree of tree roots points to the device tree
														
 
															+ */
														
 
															+#define BTRFS_DEV_TREE_OBJECTID 4ULL
														
 
															+
														
 
															+/* one per subvolume, storing files and directories */
														
 
															+#define BTRFS_FS_TREE_OBJECTID 5ULL
														
 
															+
														
 
															+/* directory objectid inside the root tree */
														
 
															+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
														
 
															+
														
 
															+/* orhpan objectid for tracking unlinked/truncated files */
														
 
															+#define BTRFS_ORPHAN_OBJECTID -5ULL
														
 
															+
														
 
															+/* does write ahead logging to speed up fsyncs */
														
 
															+#define BTRFS_TREE_LOG_OBJECTID -6ULL
														
 
															+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
														
 
															+
														
 
															+/* for space balancing */
														
 
															+#define BTRFS_TREE_RELOC_OBJECTID -8ULL
														
 
															+#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
														
 
															+
														
 
															+/* dummy objectid represents multiple objectids */
														
 
															+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
														
 
															+
														
 
															+/*
														
 
															+ * All files have objectids in this range.
														
 
															+ */
														
 
															+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
														
 
															+#define BTRFS_LAST_FREE_OBJECTID -256ULL
														
 
															+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * the device items go into the chunk tree.  The key is in the form
														
 
															+ * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
														
 
															+ */
														
 
															+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
														
 
															+
														
 
															+/*
														
 
															+ * we can actually store much bigger names, but lets not confuse the rest
														
 
															+ * of linux
														
 
															+ */
														
 
															+#define BTRFS_NAME_LEN 255
														
 
															+
														
 
															+/* 32 bytes in various csum fields */
														
 
															+#define BTRFS_CSUM_SIZE 32
														
 
															+/* four bytes for CRC32 */
														
 
															+#define BTRFS_CRC32_SIZE 4
														
 
															+#define BTRFS_EMPTY_DIR_SIZE 0
														
 
															+
														
 
															+#define BTRFS_FT_UNKNOWN	0
														
 
															+#define BTRFS_FT_REG_FILE	1
														
 
															+#define BTRFS_FT_DIR		2
														
 
															+#define BTRFS_FT_CHRDEV		3
														
 
															+#define BTRFS_FT_BLKDEV		4
														
 
															+#define BTRFS_FT_FIFO		5
														
 
															+#define BTRFS_FT_SOCK		6
														
 
															+#define BTRFS_FT_SYMLINK	7
														
 
															+#define BTRFS_FT_XATTR		8
														
 
															+#define BTRFS_FT_MAX		9
														
 
															+
														
 
															+/*
														
 
															+ * the key defines the order in the tree, and so it also defines (optimal)
														
 
															+ * block layout.  objectid corresonds to the inode number.  The flags
														
 
															+ * tells us things about the object, and is a kind of stream selector.
														
 
															+ * so for a given inode, keys with flags of 1 might refer to the inode
														
 
															+ * data, flags of 2 may point to file data in the btree and flags == 3
														
 
															+ * may point to extents.
														
 
															+ *
														
 
															+ * offset is the starting byte offset for this key in the stream.
														
 
															+ *
														
 
															+ * btrfs_disk_key is in disk byte order.  struct btrfs_key is always
														
 
															+ * in cpu native order.  Otherwise they are identical and their sizes
														
 
															+ * should be the same (ie both packed)
														
 
															+ */
														
 
															+struct btrfs_disk_key {
														
 
															+	__le64 objectid;
														
 
															+	u8 type;
														
 
															+	__le64 offset;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_key {
														
 
															+	u64 objectid;
														
 
															+	u8 type;
														
 
															+	u64 offset;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_mapping_tree {
														
 
															+	struct extent_map_tree map_tree;
														
 
															+};
														
 
															+
														
 
															+#define BTRFS_UUID_SIZE 16
														
 
															+struct btrfs_dev_item {
														
 
															+	/* the internal btrfs device id */
														
 
															+	__le64 devid;
														
 
															+
														
 
															+	/* size of the device */
														
 
															+	__le64 total_bytes;
														
 
															+
														
 
															+	/* bytes used */
														
 
															+	__le64 bytes_used;
														
 
															+
														
 
															+	/* optimal io alignment for this device */
														
 
															+	__le32 io_align;
														
 
															+
														
 
															+	/* optimal io width for this device */
														
 
															+	__le32 io_width;
														
 
															+
														
 
															+	/* minimal io size for this device */
														
 
															+	__le32 sector_size;
														
 
															+
														
 
															+	/* type and info about this device */
														
 
															+	__le64 type;
														
 
															+
														
 
															+	/* grouping information for allocation decisions */
														
 
															+	__le32 dev_group;
														
 
															+
														
 
															+	/* seek speed 0-100 where 100 is fastest */
														
 
															+	u8 seek_speed;
														
 
															+
														
 
															+	/* bandwidth 0-100 where 100 is fastest */
														
 
															+	u8 bandwidth;
														
 
															+
														
 
															+	/* btrfs generated uuid for this device */
														
 
															+	u8 uuid[BTRFS_UUID_SIZE];
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_stripe {
														
 
															+	__le64 devid;
														
 
															+	__le64 offset;
														
 
															+	u8 dev_uuid[BTRFS_UUID_SIZE];
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_chunk {
														
 
															+	/* size of this chunk in bytes */
														
 
															+	__le64 length;
														
 
															+
														
 
															+	/* objectid of the root referencing this chunk */
														
 
															+	__le64 owner;
														
 
															+
														
 
															+	__le64 stripe_len;
														
 
															+	__le64 type;
														
 
															+
														
 
															+	/* optimal io alignment for this chunk */
														
 
															+	__le32 io_align;
														
 
															+
														
 
															+	/* optimal io width for this chunk */
														
 
															+	__le32 io_width;
														
 
															+
														
 
															+	/* minimal io size for this chunk */
														
 
															+	__le32 sector_size;
														
 
															+
														
 
															+	/* 2^16 stripes is quite a lot, a second limit is the size of a single
														
 
															+	 * item in the btree
														
 
															+	 */
														
 
															+	__le16 num_stripes;
														
 
															+
														
 
															+	/* sub stripes only matter for raid10 */
														
 
															+	__le16 sub_stripes;
														
 
															+	struct btrfs_stripe stripe;
														
 
															+	/* additional stripes go here */
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
														
 
															+{
														
 
															+	BUG_ON(num_stripes == 0);
														
 
															+	return sizeof(struct btrfs_chunk) +
														
 
															+		sizeof(struct btrfs_stripe) * (num_stripes - 1);
														
 
															+}
														
 
															+
														
 
															+#define BTRFS_FSID_SIZE 16
														
 
															+#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0)
														
 
															+
														
 
															+/*
														
 
															+ * every tree block (leaf or node) starts with this header.
														
 
															+ */
														
 
															+struct btrfs_header {
														
 
															+	/* these first four must match the super block */
														
 
															+	u8 csum[BTRFS_CSUM_SIZE];
														
 
															+	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
														
 
															+	__le64 bytenr; /* which block this node is supposed to live in */
														
 
															+	__le64 flags;
														
 
															+
														
 
															+	/* allowed to be different from the super from here on down */
														
 
															+	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
														
 
															+	__le64 generation;
														
 
															+	__le64 owner;
														
 
															+	__le32 nritems;
														
 
															+	u8 level;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
														
 
															+			        sizeof(struct btrfs_header)) / \
														
 
															+			        sizeof(struct btrfs_key_ptr))
														
 
															+#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
														
 
															+#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
														
 
															+#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
														
 
															+					sizeof(struct btrfs_item) - \
														
 
															+					sizeof(struct btrfs_file_extent_item))
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * this is a very generous portion of the super block, giving us
														
 
															+ * room to translate 14 chunks with 3 stripes each.
														
 
															+ */
														
 
															+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
														
 
															+#define BTRFS_LABEL_SIZE 256
														
 
															+
														
 
															+/*
														
 
															+ * the super block basically lists the main trees of the FS
														
 
															+ * it currently lacks any block count etc etc
														
 
															+ */
														
 
															+struct btrfs_super_block {
														
 
															+	u8 csum[BTRFS_CSUM_SIZE];
														
 
															+	/* the first 4 fields must match struct btrfs_header */
														
 
															+	u8 fsid[16];    /* FS specific uuid */
														
 
															+	__le64 bytenr; /* this block number */
														
 
															+	__le64 flags;
														
 
															+
														
 
															+	/* allowed to be different from the btrfs_header from here own down */
														
 
															+	__le64 magic;
														
 
															+	__le64 generation;
														
 
															+	__le64 root;
														
 
															+	__le64 chunk_root;
														
 
															+	__le64 log_root;
														
 
															+	__le64 total_bytes;
														
 
															+	__le64 bytes_used;
														
 
															+	__le64 root_dir_objectid;
														
 
															+	__le64 num_devices;
														
 
															+	__le32 sectorsize;
														
 
															+	__le32 nodesize;
														
 
															+	__le32 leafsize;
														
 
															+	__le32 stripesize;
														
 
															+	__le32 sys_chunk_array_size;
														
 
															+	u8 root_level;
														
 
															+	u8 chunk_root_level;
														
 
															+	u8 log_root_level;
														
 
															+	struct btrfs_dev_item dev_item;
														
 
															+	char label[BTRFS_LABEL_SIZE];
														
 
															+	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+/*
														
 
															+ * A leaf is full of items. offset and size tell us where to find
														
 
															+ * the item in the leaf (relative to the start of the data area)
														
 
															+ */
														
 
															+struct btrfs_item {
														
 
															+	struct btrfs_disk_key key;
														
 
															+	__le32 offset;
														
 
															+	__le32 size;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+/*
														
 
															+ * leaves have an item area and a data area:
														
 
															+ * [item0, item1....itemN] [free space] [dataN...data1, data0]
														
 
															+ *
														
 
															+ * The data is separate from the items to get the keys closer together
														
 
															+ * during searches.
														
 
															+ */
														
 
															+struct btrfs_leaf {
														
 
															+	struct btrfs_header header;
														
 
															+	struct btrfs_item items[];
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+/*
														
 
															+ * all non-leaf blocks are nodes, they hold only keys and pointers to
														
 
															+ * other blocks
														
 
															+ */
														
 
															+struct btrfs_key_ptr {
														
 
															+	struct btrfs_disk_key key;
														
 
															+	__le64 blockptr;
														
 
															+	__le64 generation;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_node {
														
 
															+	struct btrfs_header header;
														
 
															+	struct btrfs_key_ptr ptrs[];
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+/*
														
 
															+ * btrfs_paths remember the path taken from the root down to the leaf.
														
 
															+ * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
														
 
															+ * to any other levels that are present.
														
 
															+ *
														
 
															+ * The slots array records the index of the item or block pointer
														
 
															+ * used while walking the tree.
														
 
															+ */
														
 
															+struct btrfs_path {
														
 
															+	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
														
 
															+	int slots[BTRFS_MAX_LEVEL];
														
 
															+	/* if there is real range locking, this locks field will change */
														
 
															+	int locks[BTRFS_MAX_LEVEL];
														
 
															+	int reada;
														
 
															+	/* keep some upper locks as we walk down */
														
 
															+	int keep_locks;
														
 
															+	int skip_locking;
														
 
															+	int lowest_level;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * items in the extent btree are used to record the objectid of the
														
 
															+ * owner of the block and the number of references
														
 
															+ */
														
 
															+struct btrfs_extent_item {
														
 
															+	__le32 refs;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_extent_ref {
														
 
															+	__le64 root;
														
 
															+	__le64 generation;
														
 
															+	__le64 objectid;
														
 
															+	__le32 num_refs;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+/* dev extents record free space on individual devices.  The owner
														
 
															+ * field points back to the chunk allocation mapping tree that allocated
														
 
															+ * the extent.  The chunk tree uuid field is a way to double check the owner
														
 
															+ */
														
 
															+struct btrfs_dev_extent {
														
 
															+	__le64 chunk_tree;
														
 
															+	__le64 chunk_objectid;
														
 
															+	__le64 chunk_offset;
														
 
															+	__le64 length;
														
 
															+	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_inode_ref {
														
 
															+	__le64 index;
														
 
															+	__le16 name_len;
														
 
															+	/* name goes here */
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_timespec {
														
 
															+	__le64 sec;
														
 
															+	__le32 nsec;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+/*
														
 
															+ * there is no padding here on purpose.  If you want to extent the inode,
														
 
															+ * make a new item type
														
 
															+ */
														
 
															+struct btrfs_inode_item {
														
 
															+	/* nfs style generation number */
														
 
															+	__le64 generation;
														
 
															+	/* transid that last touched this inode */
														
 
															+	__le64 transid;
														
 
															+	__le64 size;
														
 
															+	__le64 nbytes;
														
 
															+	__le64 block_group;
														
 
															+	__le32 nlink;
														
 
															+	__le32 uid;
														
 
															+	__le32 gid;
														
 
															+	__le32 mode;
														
 
															+	__le64 rdev;
														
 
															+	__le16 flags;
														
 
															+	__le16 compat_flags;
														
 
															+	struct btrfs_timespec atime;
														
 
															+	struct btrfs_timespec ctime;
														
 
															+	struct btrfs_timespec mtime;
														
 
															+	struct btrfs_timespec otime;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_dir_log_item {
														
 
															+	__le64 end;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_dir_item {
														
 
															+	struct btrfs_disk_key location;
														
 
															+	__le64 transid;
														
 
															+	__le16 data_len;
														
 
															+	__le16 name_len;
														
 
															+	u8 type;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_root_item {
														
 
															+	struct btrfs_inode_item inode;
														
 
															+	__le64 root_dirid;
														
 
															+	__le64 bytenr;
														
 
															+	__le64 byte_limit;
														
 
															+	__le64 bytes_used;
														
 
															+	__le32 flags;
														
 
															+	__le32 refs;
														
 
															+	struct btrfs_disk_key drop_progress;
														
 
															+	u8 drop_level;
														
 
															+	u8 level;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+#define BTRFS_FILE_EXTENT_REG 0
														
 
															+#define BTRFS_FILE_EXTENT_INLINE 1
														
 
															+
														
 
															+struct btrfs_file_extent_item {
														
 
															+	__le64 generation;
														
 
															+	u8 type;
														
 
															+	/*
														
 
															+	 * disk space consumed by the extent, checksum blocks are included
														
 
															+	 * in these numbers
														
 
															+	 */
														
 
															+	__le64 disk_bytenr;
														
 
															+	__le64 disk_num_bytes;
														
 
															+	/*
														
 
															+	 * the logical offset in file blocks (no csums)
														
 
															+	 * this extent record is for.  This allows a file extent to point
														
 
															+	 * into the middle of an existing extent on disk, sharing it
														
 
															+	 * between two snapshots (useful if some bytes in the middle of the
														
 
															+	 * extent have changed
														
 
															+	 */
														
 
															+	__le64 offset;
														
 
															+	/*
														
 
															+	 * the logical number of file blocks (no csums included)
														
 
															+	 */
														
 
															+	__le64 num_bytes;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_csum_item {
														
 
															+	u8 csum;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+/* different types of block groups (and chunks) */
														
 
															+#define BTRFS_BLOCK_GROUP_DATA     (1 << 0)
														
 
															+#define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
														
 
															+#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
														
 
															+#define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
														
 
															+#define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
														
 
															+#define BTRFS_BLOCK_GROUP_DUP	   (1 << 5)
														
 
															+#define BTRFS_BLOCK_GROUP_RAID10   (1 << 6)
														
 
															+
														
 
															+struct btrfs_block_group_item {
														
 
															+	__le64 used;
														
 
															+	__le64 chunk_objectid;
														
 
															+	__le64 flags;
														
 
															+} __attribute__ ((__packed__));
														
 
															+
														
 
															+struct btrfs_space_info {
														
 
															+	u64 flags;
														
 
															+	u64 total_bytes;
														
 
															+	u64 bytes_used;
														
 
															+	u64 bytes_pinned;
														
 
															+	u64 bytes_reserved;
														
 
															+	int full;
														
 
															+	int force_alloc;
														
 
															+	struct list_head list;
														
 
															+
														
 
															+	/* for block groups in our same type */
														
 
															+	struct list_head block_groups;
														
 
															+	spinlock_t lock;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_free_space {
														
 
															+	struct rb_node bytes_index;
														
 
															+	struct rb_node offset_index;
														
 
															+	u64 offset;
														
 
															+	u64 bytes;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_block_group_cache {
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_block_group_item item;
														
 
															+	spinlock_t lock;
														
 
															+	u64 pinned;
														
 
															+	u64 reserved;
														
 
															+	u64 flags;
														
 
															+	int cached;
														
 
															+	int ro;
														
 
															+	int dirty;
														
 
															+
														
 
															+	struct btrfs_space_info *space_info;
														
 
															+
														
 
															+	/* free space cache stuff */
														
 
															+	struct rb_root free_space_bytes;
														
 
															+	struct rb_root free_space_offset;
														
 
															+
														
 
															+	/* block group cache stuff */
														
 
															+	struct rb_node cache_node;
														
 
															+
														
 
															+	/* for block groups in the same raid type */
														
 
															+	struct list_head list;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_leaf_ref_tree {
														
 
															+	struct rb_root root;
														
 
															+	struct list_head list;
														
 
															+	spinlock_t lock;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_device;
														
 
															+struct btrfs_fs_devices;
														
 
															+struct btrfs_fs_info {
														
 
															+	u8 fsid[BTRFS_FSID_SIZE];
														
 
															+	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
														
 
															+	struct btrfs_root *extent_root;
														
 
															+	struct btrfs_root *tree_root;
														
 
															+	struct btrfs_root *chunk_root;
														
 
															+	struct btrfs_root *dev_root;
														
 
															+
														
 
															+	/* the log root tree is a directory of all the other log roots */
														
 
															+	struct btrfs_root *log_root_tree;
														
 
															+	struct radix_tree_root fs_roots_radix;
														
 
															+
														
 
															+	/* block group cache stuff */
														
 
															+	spinlock_t block_group_cache_lock;
														
 
															+	struct rb_root block_group_cache_tree;
														
 
															+
														
 
															+	struct extent_io_tree pinned_extents;
														
 
															+	struct extent_io_tree pending_del;
														
 
															+	struct extent_io_tree extent_ins;
														
 
															+
														
 
															+	/* logical->physical extent mapping */
														
 
															+	struct btrfs_mapping_tree mapping_tree;
														
 
															+
														
 
															+	u64 generation;
														
 
															+	u64 last_trans_committed;
														
 
															+	u64 last_trans_new_blockgroup;
														
 
															+	u64 open_ioctl_trans;
														
 
															+	unsigned long mount_opt;
														
 
															+	u64 max_extent;
														
 
															+	u64 max_inline;
														
 
															+	u64 alloc_start;
														
 
															+	struct btrfs_transaction *running_transaction;
														
 
															+	wait_queue_head_t transaction_throttle;
														
 
															+	wait_queue_head_t transaction_wait;
														
 
															+	wait_queue_head_t async_submit_wait;
														
 
															+
														
 
															+	wait_queue_head_t tree_log_wait;
														
 
															+
														
 
															+	struct btrfs_super_block super_copy;
														
 
															+	struct btrfs_super_block super_for_commit;
														
 
															+	struct block_device *__bdev;
														
 
															+	struct super_block *sb;
														
 
															+	struct inode *btree_inode;
														
 
															+	struct backing_dev_info bdi;
														
 
															+	spinlock_t hash_lock;
														
 
															+	struct mutex trans_mutex;
														
 
															+	struct mutex tree_log_mutex;
														
 
															+	struct mutex transaction_kthread_mutex;
														
 
															+	struct mutex cleaner_mutex;
														
 
															+	struct mutex alloc_mutex;
														
 
															+	struct mutex chunk_mutex;
														
 
															+	struct mutex drop_mutex;
														
 
															+	struct mutex volume_mutex;
														
 
															+	struct mutex tree_reloc_mutex;
														
 
															+	struct list_head trans_list;
														
 
															+	struct list_head hashers;
														
 
															+	struct list_head dead_roots;
														
 
															+
														
 
															+	atomic_t nr_async_submits;
														
 
															+	atomic_t async_submit_draining;
														
 
															+	atomic_t nr_async_bios;
														
 
															+	atomic_t tree_log_writers;
														
 
															+	atomic_t tree_log_commit;
														
 
															+	unsigned long tree_log_batch;
														
 
															+	u64 tree_log_transid;
														
 
															+
														
 
															+	/*
														
 
															+	 * this is used by the balancing code to wait for all the pending
														
 
															+	 * ordered extents
														
 
															+	 */
														
 
															+	spinlock_t ordered_extent_lock;
														
 
															+	struct list_head ordered_extents;
														
 
															+	struct list_head delalloc_inodes;
														
 
															+
														
 
															+	/*
														
 
															+	 * there is a pool of worker threads for checksumming during writes
														
 
															+	 * and a pool for checksumming after reads.  This is because readers
														
 
															+	 * can run with FS locks held, and the writers may be waiting for
														
 
															+	 * those locks.  We don't want ordering in the pending list to cause
														
 
															+	 * deadlocks, and so the two are serviced separately.
														
 
															+	 *
														
 
															+	 * A third pool does submit_bio to avoid deadlocking with the other
														
 
															+	 * two
														
 
															+	 */
														
 
															+	struct btrfs_workers workers;
														
 
															+	struct btrfs_workers endio_workers;
														
 
															+	struct btrfs_workers endio_write_workers;
														
 
															+	struct btrfs_workers submit_workers;
														
 
															+	/*
														
 
															+	 * fixup workers take dirty pages that didn't properly go through
														
 
															+	 * the cow mechanism and make them safe to write.  It happens
														
 
															+	 * for the sys_munmap function call path
														
 
															+	 */
														
 
															+	struct btrfs_workers fixup_workers;
														
 
															+	struct task_struct *transaction_kthread;
														
 
															+	struct task_struct *cleaner_kthread;
														
 
															+	int thread_pool_size;
														
 
															+
														
 
															+	/* tree relocation relocated fields */
														
 
															+	struct extent_io_tree reloc_mapping_tree;
														
 
															+	struct list_head dead_reloc_roots;
														
 
															+	struct btrfs_leaf_ref_tree reloc_ref_tree;
														
 
															+	struct btrfs_leaf_ref_tree shared_ref_tree;
														
 
															+
														
 
															+	struct kobject super_kobj;
														
 
															+	struct completion kobj_unregister;
														
 
															+	int do_barriers;
														
 
															+	int closing;
														
 
															+	int log_root_recovering;
														
 
															+	atomic_t throttles;
														
 
															+	atomic_t throttle_gen;
														
 
															+
														
 
															+	u64 total_pinned;
														
 
															+	struct list_head dirty_cowonly_roots;
														
 
															+
														
 
															+	struct btrfs_fs_devices *fs_devices;
														
 
															+	struct list_head space_info;
														
 
															+	spinlock_t delalloc_lock;
														
 
															+	spinlock_t new_trans_lock;
														
 
															+	u64 delalloc_bytes;
														
 
															+	u64 last_alloc;
														
 
															+	u64 last_data_alloc;
														
 
															+
														
 
															+	spinlock_t ref_cache_lock;
														
 
															+	u64 total_ref_cache_size;
														
 
															+
														
 
															+	u64 avail_data_alloc_bits;
														
 
															+	u64 avail_metadata_alloc_bits;
														
 
															+	u64 avail_system_alloc_bits;
														
 
															+	u64 data_alloc_profile;
														
 
															+	u64 metadata_alloc_profile;
														
 
															+	u64 system_alloc_profile;
														
 
															+
														
 
															+	void *bdev_holder;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * in ram representation of the tree.  extent_root is used for all allocations
														
 
															+ * and for the extent tree extent_root root.
														
 
															+ */
														
 
															+struct btrfs_dirty_root;
														
 
															+struct btrfs_root {
														
 
															+	struct extent_buffer *node;
														
 
															+
														
 
															+	/* the node lock is held while changing the node pointer */
														
 
															+	spinlock_t node_lock;
														
 
															+
														
 
															+	struct extent_buffer *commit_root;
														
 
															+	struct btrfs_leaf_ref_tree *ref_tree;
														
 
															+	struct btrfs_leaf_ref_tree ref_tree_struct;
														
 
															+	struct btrfs_dirty_root *dirty_root;
														
 
															+	struct btrfs_root *log_root;
														
 
															+	struct btrfs_root *reloc_root;
														
 
															+
														
 
															+	struct btrfs_root_item root_item;
														
 
															+	struct btrfs_key root_key;
														
 
															+	struct btrfs_fs_info *fs_info;
														
 
															+	struct inode *inode;
														
 
															+	struct extent_io_tree dirty_log_pages;
														
 
															+
														
 
															+	struct kobject root_kobj;
														
 
															+	struct completion kobj_unregister;
														
 
															+	struct mutex objectid_mutex;
														
 
															+	struct mutex log_mutex;
														
 
															+
														
 
															+	u64 objectid;
														
 
															+	u64 last_trans;
														
 
															+
														
 
															+	/* data allocations are done in sectorsize units */
														
 
															+	u32 sectorsize;
														
 
															+
														
 
															+	/* node allocations are done in nodesize units */
														
 
															+	u32 nodesize;
														
 
															+
														
 
															+	/* leaf allocations are done in leafsize units */
														
 
															+	u32 leafsize;
														
 
															+
														
 
															+	u32 stripesize;
														
 
															+
														
 
															+	u32 type;
														
 
															+	u64 highest_inode;
														
 
															+	u64 last_inode_alloc;
														
 
															+	int ref_cows;
														
 
															+	int track_dirty;
														
 
															+	u64 defrag_trans_start;
														
 
															+	struct btrfs_key defrag_progress;
														
 
															+	struct btrfs_key defrag_max;
														
 
															+	int defrag_running;
														
 
															+	int defrag_level;
														
 
															+	char *name;
														
 
															+	int in_sysfs;
														
 
															+
														
 
															+	/* the dirty list is only used by non-reference counted roots */
														
 
															+	struct list_head dirty_list;
														
 
															+
														
 
															+	spinlock_t list_lock;
														
 
															+	struct list_head dead_list;
														
 
															+	struct list_head orphan_list;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+
														
 
															+ * inode items have the data typically returned from stat and store other
														
 
															+ * info about object characteristics.  There is one for every file and dir in
														
 
															+ * the FS
														
 
															+ */
														
 
															+#define BTRFS_INODE_ITEM_KEY		1
														
 
															+#define BTRFS_INODE_REF_KEY		2
														
 
															+#define BTRFS_XATTR_ITEM_KEY		8
														
 
															+#define BTRFS_ORPHAN_ITEM_KEY		9
														
 
															+/* reserve 2-15 close to the inode for later flexibility */
														
 
															+
														
 
															+/*
														
 
															+ * dir items are the name -> inode pointers in a directory.  There is one
														
 
															+ * for every name in a directory.
														
 
															+ */
														
 
															+#define BTRFS_DIR_LOG_ITEM_KEY  14
														
 
															+#define BTRFS_DIR_LOG_INDEX_KEY 15
														
 
															+#define BTRFS_DIR_ITEM_KEY	16
														
 
															+#define BTRFS_DIR_INDEX_KEY	17
														
 
															+/*
														
 
															+ * extent data is for file data
														
 
															+ */
														
 
															+#define BTRFS_EXTENT_DATA_KEY	18
														
 
															+/*
														
 
															+ * csum items have the checksums for data in the extents
														
 
															+ */
														
 
															+#define BTRFS_CSUM_ITEM_KEY	19
														
 
															+
														
 
															+
														
 
															+/* reserve 21-31 for other file/dir stuff */
														
 
															+
														
 
															+/*
														
 
															+ * root items point to tree roots.  There are typically in the root
														
 
															+ * tree used by the super block to find all the other trees
														
 
															+ */
														
 
															+#define BTRFS_ROOT_ITEM_KEY	32
														
 
															+/*
														
 
															+ * extent items are in the extent map tree.  These record which blocks
														
 
															+ * are used, and how many references there are to each block
														
 
															+ */
														
 
															+#define BTRFS_EXTENT_ITEM_KEY	33
														
 
															+#define BTRFS_EXTENT_REF_KEY	34
														
 
															+
														
 
															+/*
														
 
															+ * block groups give us hints into the extent allocation trees.  Which
														
 
															+ * blocks are free etc etc
														
 
															+ */
														
 
															+#define BTRFS_BLOCK_GROUP_ITEM_KEY 50
														
 
															+
														
 
															+#define BTRFS_DEV_EXTENT_KEY	75
														
 
															+#define BTRFS_DEV_ITEM_KEY	76
														
 
															+#define BTRFS_CHUNK_ITEM_KEY	77
														
 
															+
														
 
															+/*
														
 
															+ * string items are for debugging.  They just store a short string of
														
 
															+ * data in the FS
														
 
															+ */
														
 
															+#define BTRFS_STRING_ITEM_KEY	253
														
 
															+
														
 
															+#define BTRFS_MOUNT_NODATASUM		(1 << 0)
														
 
															+#define BTRFS_MOUNT_NODATACOW		(1 << 1)
														
 
															+#define BTRFS_MOUNT_NOBARRIER		(1 << 2)
														
 
															+#define BTRFS_MOUNT_SSD			(1 << 3)
														
 
															+#define BTRFS_MOUNT_DEGRADED		(1 << 4)
														
 
															+
														
 
															+#define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
														
 
															+#define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
														
 
															+#define btrfs_test_opt(root, opt)	((root)->fs_info->mount_opt & \
														
 
															+					 BTRFS_MOUNT_##opt)
														
 
															+/*
														
 
															+ * Inode flags
														
 
															+ */
														
 
															+#define BTRFS_INODE_NODATASUM		(1 << 0)
														
 
															+#define BTRFS_INODE_NODATACOW		(1 << 1)
														
 
															+#define BTRFS_INODE_READONLY		(1 << 2)
														
 
															+#define btrfs_clear_flag(inode, flag)	(BTRFS_I(inode)->flags &= \
														
 
															+					 ~BTRFS_INODE_##flag)
														
 
															+#define btrfs_set_flag(inode, flag)	(BTRFS_I(inode)->flags |= \
														
 
															+					 BTRFS_INODE_##flag)
														
 
															+#define btrfs_test_flag(inode, flag)	(BTRFS_I(inode)->flags & \
														
 
															+					 BTRFS_INODE_##flag)
														
 
															+/* some macros to generate set/get funcs for the struct fields.  This
														
 
															+ * assumes there is a lefoo_to_cpu for every type, so lets make a simple
														
 
															+ * one for u8:
														
 
															+ */
														
 
															+#define le8_to_cpu(v) (v)
														
 
															+#define cpu_to_le8(v) (v)
														
 
															+#define __le8 u8
														
 
															+
														
 
															+#define read_eb_member(eb, ptr, type, member, result) (			\
														
 
															+	read_extent_buffer(eb, (char *)(result),			\
														
 
															+			   ((unsigned long)(ptr)) +			\
														
 
															+			    offsetof(type, member),			\
														
 
															+			   sizeof(((type *)0)->member)))
														
 
															+
														
 
															+#define write_eb_member(eb, ptr, type, member, result) (		\
														
 
															+	write_extent_buffer(eb, (char *)(result),			\
														
 
															+			   ((unsigned long)(ptr)) +			\
														
 
															+			    offsetof(type, member),			\
														
 
															+			   sizeof(((type *)0)->member)))
														
 
															+
														
 
															+#ifndef BTRFS_SETGET_FUNCS
														
 
															+#define BTRFS_SETGET_FUNCS(name, type, member, bits)			\
														
 
															+u##bits btrfs_##name(struct extent_buffer *eb, type *s);		\
														
 
															+void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
														
 
															+#endif
														
 
															+
														
 
															+#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)		\
														
 
															+static inline u##bits btrfs_##name(struct extent_buffer *eb)		\
														
 
															+{									\
														
 
															+	type *p = kmap_atomic(eb->first_page, KM_USER0);		\
														
 
															+	u##bits res = le##bits##_to_cpu(p->member);			\
														
 
															+	kunmap_atomic(p, KM_USER0);					\
														
 
															+	return res;							\
														
 
															+}									\
														
 
															+static inline void btrfs_set_##name(struct extent_buffer *eb,		\
														
 
															+				    u##bits val)			\
														
 
															+{									\
														
 
															+	type *p = kmap_atomic(eb->first_page, KM_USER0);		\
														
 
															+	p->member = cpu_to_le##bits(val);				\
														
 
															+	kunmap_atomic(p, KM_USER0);					\
														
 
															+}
														
 
															+
														
 
															+#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits)		\
														
 
															+static inline u##bits btrfs_##name(type *s)				\
														
 
															+{									\
														
 
															+	return le##bits##_to_cpu(s->member);				\
														
 
															+}									\
														
 
															+static inline void btrfs_set_##name(type *s, u##bits val)		\
														
 
															+{									\
														
 
															+	s->member = cpu_to_le##bits(val);				\
														
 
															+}
														
 
															+
														
 
															+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
														
 
															+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
														
 
															+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
														
 
															+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
														
 
															+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
														
 
															+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
														
 
															+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
														
 
															+BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
														
 
															+BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
														
 
															+BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
														
 
															+
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
														
 
															+			 total_bytes, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
														
 
															+			 bytes_used, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
														
 
															+			 io_align, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
														
 
															+			 io_width, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
														
 
															+			 sector_size, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
														
 
															+			 dev_group, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
														
 
															+			 seek_speed, 8);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
														
 
															+			 bandwidth, 8);
														
 
															+
														
 
															+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
														
 
															+{
														
 
															+	return (char *)d + offsetof(struct btrfs_dev_item, uuid);
														
 
															+}
														
 
															+
														
 
															+BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
														
 
															+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
														
 
															+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
														
 
															+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
														
 
															+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
														
 
															+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
														
 
															+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
														
 
															+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
														
 
															+BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
														
 
															+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
														
 
															+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
														
 
															+
														
 
															+static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
														
 
															+{
														
 
															+	return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
														
 
															+}
														
 
															+
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
														
 
															+			 stripe_len, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
														
 
															+			 io_align, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
														
 
															+			 io_width, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
														
 
															+			 sector_size, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
														
 
															+			 num_stripes, 16);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
														
 
															+			 sub_stripes, 16);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
														
 
															+
														
 
															+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
														
 
															+						   int nr)
														
 
															+{
														
 
															+	unsigned long offset = (unsigned long)c;
														
 
															+	offset += offsetof(struct btrfs_chunk, stripe);
														
 
															+	offset += nr * sizeof(struct btrfs_stripe);
														
 
															+	return (struct btrfs_stripe *)offset;
														
 
															+}
														
 
															+
														
 
															+static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
														
 
															+{
														
 
															+	return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
														
 
															+}
														
 
															+
														
 
															+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
														
 
															+					 struct btrfs_chunk *c, int nr)
														
 
															+{
														
 
															+	return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
														
 
															+					     struct btrfs_chunk *c, int nr,
														
 
															+					     u64 val)
														
 
															+{
														
 
															+	btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
														
 
															+}
														
 
															+
														
 
															+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
														
 
															+					 struct btrfs_chunk *c, int nr)
														
 
															+{
														
 
															+	return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
														
 
															+					     struct btrfs_chunk *c, int nr,
														
 
															+					     u64 val)
														
 
															+{
														
 
															+	btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
														
 
															+}
														
 
															+
														
 
															+/* struct btrfs_block_group_item */
														
 
															+BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
														
 
															+			 used, 64);
														
 
															+BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
														
 
															+			 used, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
														
 
															+			struct btrfs_block_group_item, chunk_objectid, 64);
														
 
															+
														
 
															+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
														
 
															+		   struct btrfs_block_group_item, chunk_objectid, 64);
														
 
															+BTRFS_SETGET_FUNCS(disk_block_group_flags,
														
 
															+		   struct btrfs_block_group_item, flags, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
														
 
															+			struct btrfs_block_group_item, flags, 64);
														
 
															+
														
 
															+/* struct btrfs_inode_ref */
														
 
															+BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
														
 
															+BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
														
 
															+
														
 
															+/* struct btrfs_inode_item */
														
 
															+BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
														
 
															+BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
														
 
															+BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
														
 
															+BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64);
														
 
															+BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
														
 
															+BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
														
 
															+BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
														
 
															+BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
														
 
															+BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
														
 
															+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
														
 
															+BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16);
														
 
															+BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item,
														
 
															+		   compat_flags, 16);
														
 
															+
														
 
															+static inline struct btrfs_timespec *
														
 
															+btrfs_inode_atime(struct btrfs_inode_item *inode_item)
														
 
															+{
														
 
															+	unsigned long ptr = (unsigned long)inode_item;
														
 
															+	ptr += offsetof(struct btrfs_inode_item, atime);
														
 
															+	return (struct btrfs_timespec *)ptr;
														
 
															+}
														
 
															+
														
 
															+static inline struct btrfs_timespec *
														
 
															+btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
														
 
															+{
														
 
															+	unsigned long ptr = (unsigned long)inode_item;
														
 
															+	ptr += offsetof(struct btrfs_inode_item, mtime);
														
 
															+	return (struct btrfs_timespec *)ptr;
														
 
															+}
														
 
															+
														
 
															+static inline struct btrfs_timespec *
														
 
															+btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
														
 
															+{
														
 
															+	unsigned long ptr = (unsigned long)inode_item;
														
 
															+	ptr += offsetof(struct btrfs_inode_item, ctime);
														
 
															+	return (struct btrfs_timespec *)ptr;
														
 
															+}
														
 
															+
														
 
															+static inline struct btrfs_timespec *
														
 
															+btrfs_inode_otime(struct btrfs_inode_item *inode_item)
														
 
															+{
														
 
															+	unsigned long ptr = (unsigned long)inode_item;
														
 
															+	ptr += offsetof(struct btrfs_inode_item, otime);
														
 
															+	return (struct btrfs_timespec *)ptr;
														
 
															+}
														
 
															+
														
 
															+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
														
 
															+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
														
 
															+
														
 
															+/* struct btrfs_dev_extent */
														
 
															+BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
														
 
															+		   chunk_tree, 64);
														
 
															+BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
														
 
															+		   chunk_objectid, 64);
														
 
															+BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
														
 
															+		   chunk_offset, 64);
														
 
															+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
														
 
															+
														
 
															+static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
														
 
															+{
														
 
															+	unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
														
 
															+	return (u8 *)((unsigned long)dev + ptr);
														
 
															+}
														
 
															+
														
 
															+/* struct btrfs_extent_ref */
														
 
															+BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
														
 
															+BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
														
 
															+BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64);
														
 
															+BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
														
 
															+
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref,
														
 
															+			 generation, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref,
														
 
															+			 objectid, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref,
														
 
															+			 num_refs, 32);
														
 
															+
														
 
															+/* struct btrfs_extent_item */
														
 
															+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item,
														
 
															+			 refs, 32);
														
 
															+
														
 
															+/* struct btrfs_node */
														
 
															+BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
														
 
															+BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
														
 
															+
														
 
															+static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
														
 
															+{
														
 
															+	unsigned long ptr;
														
 
															+	ptr = offsetof(struct btrfs_node, ptrs) +
														
 
															+		sizeof(struct btrfs_key_ptr) * nr;
														
 
															+	return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
														
 
															+					   int nr, u64 val)
														
 
															+{
														
 
															+	unsigned long ptr;
														
 
															+	ptr = offsetof(struct btrfs_node, ptrs) +
														
 
															+		sizeof(struct btrfs_key_ptr) * nr;
														
 
															+	btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
														
 
															+}
														
 
															+
														
 
															+static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
														
 
															+{
														
 
															+	unsigned long ptr;
														
 
															+	ptr = offsetof(struct btrfs_node, ptrs) +
														
 
															+		sizeof(struct btrfs_key_ptr) * nr;
														
 
															+	return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
														
 
															+						 int nr, u64 val)
														
 
															+{
														
 
															+	unsigned long ptr;
														
 
															+	ptr = offsetof(struct btrfs_node, ptrs) +
														
 
															+		sizeof(struct btrfs_key_ptr) * nr;
														
 
															+	btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
														
 
															+}
														
 
															+
														
 
															+static inline unsigned long btrfs_node_key_ptr_offset(int nr)
														
 
															+{
														
 
															+	return offsetof(struct btrfs_node, ptrs) +
														
 
															+		sizeof(struct btrfs_key_ptr) * nr;
														
 
															+}
														
 
															+
														
 
															+void btrfs_node_key(struct extent_buffer *eb,
														
 
															+		    struct btrfs_disk_key *disk_key, int nr);
														
 
															+
														
 
															+static inline void btrfs_set_node_key(struct extent_buffer *eb,
														
 
															+				      struct btrfs_disk_key *disk_key, int nr)
														
 
															+{
														
 
															+	unsigned long ptr;
														
 
															+	ptr = btrfs_node_key_ptr_offset(nr);
														
 
															+	write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
														
 
															+		       struct btrfs_key_ptr, key, disk_key);
														
 
															+}
														
 
															+
														
 
															+/* struct btrfs_item */
														
 
															+BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
														
 
															+BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
														
 
															+
														
 
															+static inline unsigned long btrfs_item_nr_offset(int nr)
														
 
															+{
														
 
															+	return offsetof(struct btrfs_leaf, items) +
														
 
															+		sizeof(struct btrfs_item) * nr;
														
 
															+}
														
 
															+
														
 
															+static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
														
 
															+					       int nr)
														
 
															+{
														
 
															+	return (struct btrfs_item *)btrfs_item_nr_offset(nr);
														
 
															+}
														
 
															+
														
 
															+static inline u32 btrfs_item_end(struct extent_buffer *eb,
														
 
															+				 struct btrfs_item *item)
														
 
															+{
														
 
															+	return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
														
 
															+}
														
 
															+
														
 
															+static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
														
 
															+{
														
 
															+	return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
														
 
															+}
														
 
															+
														
 
															+static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
														
 
															+{
														
 
															+	return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
														
 
															+}
														
 
															+
														
 
															+static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
														
 
															+{
														
 
															+	return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_item_key(struct extent_buffer *eb,
														
 
															+			   struct btrfs_disk_key *disk_key, int nr)
														
 
															+{
														
 
															+	struct btrfs_item *item = btrfs_item_nr(eb, nr);
														
 
															+	read_eb_member(eb, item, struct btrfs_item, key, disk_key);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_item_key(struct extent_buffer *eb,
														
 
															+			       struct btrfs_disk_key *disk_key, int nr)
														
 
															+{
														
 
															+	struct btrfs_item *item = btrfs_item_nr(eb, nr);
														
 
															+	write_eb_member(eb, item, struct btrfs_item, key, disk_key);
														
 
															+}
														
 
															+
														
 
															+BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
														
 
															+
														
 
															+/* struct btrfs_dir_item */
														
 
															+BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
														
 
															+BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
														
 
															+BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
														
 
															+BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
														
 
															+
														
 
															+static inline void btrfs_dir_item_key(struct extent_buffer *eb,
														
 
															+				      struct btrfs_dir_item *item,
														
 
															+				      struct btrfs_disk_key *key)
														
 
															+{
														
 
															+	read_eb_member(eb, item, struct btrfs_dir_item, location, key);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
														
 
															+					  struct btrfs_dir_item *item,
														
 
															+					  struct btrfs_disk_key *key)
														
 
															+{
														
 
															+	write_eb_member(eb, item, struct btrfs_dir_item, location, key);
														
 
															+}
														
 
															+
														
 
															+/* struct btrfs_disk_key */
														
 
															+BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
														
 
															+			 objectid, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
														
 
															+
														
 
															+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
														
 
															+					 struct btrfs_disk_key *disk)
														
 
															+{
														
 
															+	cpu->offset = le64_to_cpu(disk->offset);
														
 
															+	cpu->type = disk->type;
														
 
															+	cpu->objectid = le64_to_cpu(disk->objectid);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
														
 
															+					 struct btrfs_key *cpu)
														
 
															+{
														
 
															+	disk->offset = cpu_to_le64(cpu->offset);
														
 
															+	disk->type = cpu->type;
														
 
															+	disk->objectid = cpu_to_le64(cpu->objectid);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
														
 
															+				  struct btrfs_key *key, int nr)
														
 
															+{
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	btrfs_node_key(eb, &disk_key, nr);
														
 
															+	btrfs_disk_key_to_cpu(key, &disk_key);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
														
 
															+				  struct btrfs_key *key, int nr)
														
 
															+{
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	btrfs_item_key(eb, &disk_key, nr);
														
 
															+	btrfs_disk_key_to_cpu(key, &disk_key);
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
														
 
															+				      struct btrfs_dir_item *item,
														
 
															+				      struct btrfs_key *key)
														
 
															+{
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	btrfs_dir_item_key(eb, item, &disk_key);
														
 
															+	btrfs_disk_key_to_cpu(key, &disk_key);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static inline u8 btrfs_key_type(struct btrfs_key *key)
														
 
															+{
														
 
															+	return key->type;
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val)
														
 
															+{
														
 
															+	key->type = val;
														
 
															+}
														
 
															+
														
 
															+/* struct btrfs_header */
														
 
															+BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
														
 
															+BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
														
 
															+			  generation, 64);
														
 
															+BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
														
 
															+BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
														
 
															+BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
														
 
															+BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
														
 
															+
														
 
															+static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
														
 
															+{
														
 
															+	return (btrfs_header_flags(eb) & flag) == flag;
														
 
															+}
														
 
															+
														
 
															+static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
														
 
															+{
														
 
															+	u64 flags = btrfs_header_flags(eb);
														
 
															+	btrfs_set_header_flags(eb, flags | flag);
														
 
															+	return (flags & flag) == flag;
														
 
															+}
														
 
															+
														
 
															+static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
														
 
															+{
														
 
															+	u64 flags = btrfs_header_flags(eb);
														
 
															+	btrfs_set_header_flags(eb, flags & ~flag);
														
 
															+	return (flags & flag) == flag;
														
 
															+}
														
 
															+
														
 
															+static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
														
 
															+{
														
 
															+	unsigned long ptr = offsetof(struct btrfs_header, fsid);
														
 
															+	return (u8 *)ptr;
														
 
															+}
														
 
															+
														
 
															+static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
														
 
															+{
														
 
															+	unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid);
														
 
															+	return (u8 *)ptr;
														
 
															+}
														
 
															+
														
 
															+static inline u8 *btrfs_super_fsid(struct extent_buffer *eb)
														
 
															+{
														
 
															+	unsigned long ptr = offsetof(struct btrfs_super_block, fsid);
														
 
															+	return (u8 *)ptr;
														
 
															+}
														
 
															+
														
 
															+static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
														
 
															+{
														
 
															+	unsigned long ptr = offsetof(struct btrfs_header, csum);
														
 
															+	return (u8 *)ptr;
														
 
															+}
														
 
															+
														
 
															+static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb)
														
 
															+{
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb)
														
 
															+{
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb)
														
 
															+{
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static inline int btrfs_is_leaf(struct extent_buffer *eb)
														
 
															+{
														
 
															+	return (btrfs_header_level(eb) == 0);
														
 
															+}
														
 
															+
														
 
															+/* struct btrfs_root_item */
														
 
															+BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
														
 
															+BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
														
 
															+BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
														
 
															+
														
 
															+BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
														
 
															+BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
														
 
															+
														
 
															+/* struct btrfs_super_block */
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
														
 
															+			 generation, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
														
 
															+			 struct btrfs_super_block, sys_chunk_array_size, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
														
 
															+			 root_level, 8);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
														
 
															+			 chunk_root, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
														
 
															+			 chunk_root_level, 8);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
														
 
															+			 log_root, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
														
 
															+			 log_root_level, 8);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
														
 
															+			 total_bytes, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
														
 
															+			 bytes_used, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
														
 
															+			 sectorsize, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
														
 
															+			 nodesize, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block,
														
 
															+			 leafsize, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
														
 
															+			 stripesize, 32);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
														
 
															+			 root_dir_objectid, 64);
														
 
															+BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
														
 
															+			 num_devices, 64);
														
 
															+
														
 
															+static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
														
 
															+{
														
 
															+	return offsetof(struct btrfs_leaf, items);
														
 
															+}
														
 
															+
														
 
															+/* struct btrfs_file_extent_item */
														
 
															+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
														
 
															+
														
 
															+static inline unsigned long btrfs_file_extent_inline_start(struct
														
 
															+						   btrfs_file_extent_item *e)
														
 
															+{
														
 
															+	unsigned long offset = (unsigned long)e;
														
 
															+	offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
														
 
															+	return offset;
														
 
															+}
														
 
															+
														
 
															+static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
														
 
															+{
														
 
															+	return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
														
 
															+}
														
 
															+
														
 
															+static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
														
 
															+					       struct btrfs_item *e)
														
 
															+{
														
 
															+	unsigned long offset;
														
 
															+	offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
														
 
															+	return btrfs_item_size(eb, e) - offset;
														
 
															+}
														
 
															+
														
 
															+BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
														
 
															+		   disk_bytenr, 64);
														
 
															+BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
														
 
															+		   generation, 64);
														
 
															+BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
														
 
															+		   disk_num_bytes, 64);
														
 
															+BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
														
 
															+		  offset, 64);
														
 
															+BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
														
 
															+		   num_bytes, 64);
														
 
															+
														
 
															+static inline struct btrfs_root *btrfs_sb(struct super_block *sb)
														
 
															+{
														
 
															+	return sb->s_fs_info;
														
 
															+}
														
 
															+
														
 
															+static inline int btrfs_set_root_name(struct btrfs_root *root,
														
 
															+				      const char *name, int len)
														
 
															+{
														
 
															+	/* if we already have a name just free it */
														
 
															+	if (root->name)
														
 
															+		kfree(root->name);
														
 
															+
														
 
															+	root->name = kmalloc(len+1, GFP_KERNEL);
														
 
															+	if (!root->name)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	memcpy(root->name, name, len);
														
 
															+	root->name[len] ='\0';
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static inline u32 btrfs_level_size(struct btrfs_root *root, int level) {
														
 
															+	if (level == 0)
														
 
															+		return root->leafsize;
														
 
															+	return root->nodesize;
														
 
															+}
														
 
															+
														
 
															+/* helper function to cast into the data area of the leaf. */
														
 
															+#define btrfs_item_ptr(leaf, slot, type) \
														
 
															+	((type *)(btrfs_leaf_data(leaf) + \
														
 
															+	btrfs_item_offset_nr(leaf, slot)))
														
 
															+
														
 
															+#define btrfs_item_ptr_offset(leaf, slot) \
														
 
															+	((unsigned long)(btrfs_leaf_data(leaf) + \
														
 
															+	btrfs_item_offset_nr(leaf, slot)))
														
 
															+
														
 
															+static inline struct dentry *fdentry(struct file *file)
														
 
															+{
														
 
															+	return file->f_path.dentry;
														
 
															+}
														
 
															+
														
 
															+/* extent-tree.c */
														
 
															+int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
														
 
															+int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, u64 bytenr,
														
 
															+			    u64 num_bytes, u32 *refs);
														
 
															+int btrfs_update_pinned_extents(struct btrfs_root *root,
														
 
															+				u64 bytenr, u64 num, int pin);
														
 
															+int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root, struct extent_buffer *leaf);
														
 
															+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   struct btrfs_key *key, u64 bytenr);
														
 
															+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
														
 
															+			 struct btrfs_root *root);
														
 
															+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
														
 
															+struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
														
 
															+							 btrfs_fs_info *info,
														
 
															+							 u64 bytenr);
														
 
															+struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
														
 
															+						 struct btrfs_block_group_cache
														
 
															+						 *hint, u64 search_start,
														
 
															+						 int data, int owner);
														
 
															+struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
														
 
															+					     struct btrfs_root *root,
														
 
															+					     u32 blocksize, u64 parent,
														
 
															+					     u64 root_objectid,
														
 
															+					     u64 ref_generation,
														
 
															+					     int level,
														
 
															+					     u64 hint,
														
 
															+					     u64 empty_size);
														
 
															+struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
														
 
															+					    struct btrfs_root *root,
														
 
															+					    u64 bytenr, u32 blocksize);
														
 
															+int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root,
														
 
															+		       u64 num_bytes, u64 parent, u64 min_bytes,
														
 
															+		       u64 root_objectid, u64 ref_generation,
														
 
															+		       u64 owner, u64 empty_size, u64 hint_byte,
														
 
															+		       u64 search_end, struct btrfs_key *ins, u64 data);
														
 
															+int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
														
 
															+				struct btrfs_root *root, u64 parent,
														
 
															+				u64 root_objectid, u64 ref_generation,
														
 
															+				u64 owner, struct btrfs_key *ins);
														
 
															+int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
														
 
															+				struct btrfs_root *root, u64 parent,
														
 
															+				u64 root_objectid, u64 ref_generation,
														
 
															+				u64 owner, struct btrfs_key *ins);
														
 
															+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
														
 
															+				  struct btrfs_root *root,
														
 
															+				  u64 num_bytes, u64 min_alloc_size,
														
 
															+				  u64 empty_size, u64 hint_byte,
														
 
															+				  u64 search_end, struct btrfs_key *ins,
														
 
															+				  u64 data);
														
 
															+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		  struct extent_buffer *orig_buf, struct extent_buffer *buf,
														
 
															+		  u32 *nr_extents);
														
 
															+int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		    struct extent_buffer *buf, u32 nr_extents);
														
 
															+int btrfs_update_ref(struct btrfs_trans_handle *trans,
														
 
															+		     struct btrfs_root *root, struct extent_buffer *orig_buf,
														
 
															+		     struct extent_buffer *buf, int start_slot, int nr);
														
 
															+int btrfs_free_extent(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *root,
														
 
															+		      u64 bytenr, u64 num_bytes, u64 parent,
														
 
															+		      u64 root_objectid, u64 ref_generation,
														
 
															+		      u64 owner_objectid, int pin);
														
 
															+int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
														
 
															+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       struct extent_io_tree *unpin);
														
 
															+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+			 struct btrfs_root *root,
														
 
															+			 u64 bytenr, u64 num_bytes, u64 parent,
														
 
															+			 u64 root_objectid, u64 ref_generation,
														
 
															+			 u64 owner_objectid);
														
 
															+int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, u64 bytenr,
														
 
															+			    u64 orig_parent, u64 parent,
														
 
															+			    u64 root_objectid, u64 ref_generation,
														
 
															+			    u64 owner_objectid);
														
 
															+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
														
 
															+				    struct btrfs_root *root);
														
 
															+int btrfs_free_block_groups(struct btrfs_fs_info *info);
														
 
															+int btrfs_read_block_groups(struct btrfs_root *root);
														
 
															+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root, u64 bytes_used,
														
 
															+			   u64 type, u64 chunk_objectid, u64 chunk_offset,
														
 
															+			   u64 size);
														
 
															+int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root, u64 group_start);
														
 
															+int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
														
 
															+int btrfs_free_reloc_root(struct btrfs_root *root);
														
 
															+int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
														
 
															+int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
														
 
															+			    u64 num_bytes, u64 new_bytenr);
														
 
															+int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
														
 
															+			    u64 num_bytes, u64 *new_bytenr);
														
 
															+void btrfs_free_reloc_mappings(struct btrfs_root *root);
														
 
															+int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       struct extent_buffer *buf, u64 orig_start);
														
 
															+int btrfs_add_dead_reloc_root(struct btrfs_root *root);
														
 
															+int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
														
 
															+/* ctree.c */
														
 
															+int btrfs_previous_item(struct btrfs_root *root,
														
 
															+			struct btrfs_path *path, u64 min_objectid,
														
 
															+			int type);
														
 
															+int btrfs_merge_path(struct btrfs_trans_handle *trans,
														
 
															+		     struct btrfs_root *root,
														
 
															+		     struct btrfs_key *node_keys,
														
 
															+		     u64 *nodes, int lowest_level);
														
 
															+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			    struct btrfs_key *new_key);
														
 
															+struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
														
 
															+struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
														
 
															+int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			struct btrfs_key *key, int lowest_level,
														
 
															+			int cache_only, u64 min_trans);
														
 
															+int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
														
 
															+			 struct btrfs_key *max_key,
														
 
															+			 struct btrfs_path *path, int cache_only,
														
 
															+			 u64 min_trans);
														
 
															+int btrfs_cow_block(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct extent_buffer *buf,
														
 
															+		    struct extent_buffer *parent, int parent_slot,
														
 
															+		    struct extent_buffer **cow_ret, u64 prealloc_dest);
														
 
															+int btrfs_copy_root(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *root,
														
 
															+		      struct extent_buffer *buf,
														
 
															+		      struct extent_buffer **cow_ret, u64 new_root_objectid);
														
 
															+int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_path *path, u32 data_size);
														
 
															+int btrfs_truncate_item(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root,
														
 
															+			struct btrfs_path *path,
														
 
															+			u32 new_size, int from_end);
														
 
															+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *key, struct btrfs_path *p, int
														
 
															+		      ins_len, int cow);
														
 
															+int btrfs_realloc_node(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root, struct extent_buffer *parent,
														
 
															+		       int start_slot, int cache_only, u64 *last_ret,
														
 
															+		       struct btrfs_key *progress);
														
 
															+void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
														
 
															+struct btrfs_path *btrfs_alloc_path(void);
														
 
															+void btrfs_free_path(struct btrfs_path *p);
														
 
															+void btrfs_init_path(struct btrfs_path *p);
														
 
															+int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		   struct btrfs_path *path, int slot, int nr);
														
 
															+int btrfs_del_leaf(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root,
														
 
															+			    struct btrfs_path *path, u64 bytenr);
														
 
															+static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *root,
														
 
															+				 struct btrfs_path *path)
														
 
															+{
														
 
															+	return btrfs_del_items(trans, root, path, path->slots[0], 1);
														
 
															+}
														
 
															+
														
 
															+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *key, void *data, u32 data_size);
														
 
															+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     struct btrfs_path *path,
														
 
															+			     struct btrfs_key *cpu_key, u32 *data_size, int nr);
														
 
															+
														
 
															+static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path,
														
 
															+					  struct btrfs_key *key,
														
 
															+					  u32 data_size)
														
 
															+{
														
 
															+	return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
														
 
															+}
														
 
															+
														
 
															+int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
														
 
															+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
														
 
															+int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
														
 
															+int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+			*root);
														
 
															+/* root-item.c */
														
 
															+int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		   struct btrfs_key *key);
														
 
															+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *key, struct btrfs_root_item
														
 
															+		      *item);
														
 
															+int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *key, struct btrfs_root_item
														
 
															+		      *item);
														
 
															+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
														
 
															+			 btrfs_root_item *item, struct btrfs_key *key);
														
 
															+int btrfs_search_root(struct btrfs_root *root, u64 search_start,
														
 
															+		      u64 *found_objectid);
														
 
															+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
														
 
															+			  struct btrfs_root *latest_root);
														
 
															+/* dir-item.c */
														
 
															+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+			  *root, const char *name, int name_len, u64 dir,
														
 
															+			  struct btrfs_key *location, u8 type, u64 index);
														
 
															+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
														
 
															+					     struct btrfs_root *root,
														
 
															+					     struct btrfs_path *path, u64 dir,
														
 
															+					     const char *name, int name_len,
														
 
															+					     int mod);
														
 
															+struct btrfs_dir_item *
														
 
															+btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root,
														
 
															+			    struct btrfs_path *path, u64 dir,
														
 
															+			    u64 objectid, const char *name, int name_len,
														
 
															+			    int mod);
														
 
															+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
														
 
															+			      struct btrfs_path *path,
														
 
															+			      const char *name, int name_len);
														
 
															+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
														
 
															+			      struct btrfs_root *root,
														
 
															+			      struct btrfs_path *path,
														
 
															+			      struct btrfs_dir_item *di);
														
 
															+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, const char *name,
														
 
															+			    u16 name_len, const void *data, u16 data_len,
														
 
															+			    u64 dir);
														
 
															+struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path, u64 dir,
														
 
															+					  const char *name, u16 name_len,
														
 
															+					  int mod);
														
 
															+
														
 
															+/* orphan.c */
														
 
															+int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root, u64 offset);
														
 
															+int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, u64 offset);
														
 
															+
														
 
															+/* inode-map.c */
														
 
															+int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *fs_root,
														
 
															+			     u64 dirid, u64 *objectid);
														
 
															+int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
														
 
															+
														
 
															+/* inode-item.c */
														
 
															+int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   const char *name, int name_len,
														
 
															+			   u64 inode_objectid, u64 ref_objectid, u64 index);
														
 
															+int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   const char *name, int name_len,
														
 
															+			   u64 inode_objectid, u64 ref_objectid, u64 *index);
														
 
															+int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     struct btrfs_path *path, u64 objectid);
														
 
															+int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		       *root, struct btrfs_path *path,
														
 
															+		       struct btrfs_key *location, int mod);
														
 
															+
														
 
															+/* file-item.c */
														
 
															+int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
														
 
															+			  struct bio *bio);
														
 
															+int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       u64 objectid, u64 pos, u64 disk_offset,
														
 
															+			       u64 disk_num_bytes,
														
 
															+			     u64 num_bytes, u64 offset);
														
 
															+int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     struct btrfs_path *path, u64 objectid,
														
 
															+			     u64 bytenr, int mod);
														
 
															+int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root, struct inode *inode,
														
 
															+			   struct btrfs_ordered_sum *sums);
														
 
															+int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
														
 
															+		       struct bio *bio);
														
 
															+struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path,
														
 
															+					  u64 objectid, u64 offset,
														
 
															+					  int cow);
														
 
															+int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			u64 isize);
														
 
															+/* inode.c */
														
 
															+
														
 
															+/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
														
 
															+#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
														
 
															+#define ClearPageChecked ClearPageFsMisc
														
 
															+#define SetPageChecked SetPageFsMisc
														
 
															+#define PageChecked PageFsMisc
														
 
															+#endif
														
 
															+
														
 
															+int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root,
														
 
															+		       struct inode *dir, struct inode *inode,
														
 
															+		       const char *name, int name_len);
														
 
															+int btrfs_add_link(struct btrfs_trans_handle *trans,
														
 
															+		   struct inode *parent_inode, struct inode *inode,
														
 
															+		   const char *name, int name_len, int add_backref, u64 index);
														
 
															+int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       struct inode *inode, u64 new_size,
														
 
															+			       u32 min_type);
														
 
															+
														
 
															+int btrfs_start_delalloc_inodes(struct btrfs_root *root);
														
 
															+int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
														
 
															+int btrfs_writepages(struct address_space *mapping,
														
 
															+		     struct writeback_control *wbc);
														
 
															+int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
														
 
															+		struct btrfs_trans_handle *trans, u64 new_dirid,
														
 
															+		struct btrfs_block_group_cache *block_group);
														
 
															+
														
 
															+void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
														
 
															+				  int namelen);
														
 
															+
														
 
															+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
														
 
															+			 size_t size, struct bio *bio);
														
 
															+
														
 
															+unsigned long btrfs_force_ra(struct address_space *mapping,
														
 
															+			      struct file_ra_state *ra, struct file *file,
														
 
															+			      pgoff_t offset, pgoff_t last_index);
														
 
															+int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
														
 
															+			   int for_del);
														
 
															+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
														
 
															+int btrfs_readpage(struct file *file, struct page *page);
														
 
															+void btrfs_delete_inode(struct inode *inode);
														
 
															+void btrfs_put_inode(struct inode *inode);
														
 
															+void btrfs_read_locked_inode(struct inode *inode);
														
 
															+int btrfs_write_inode(struct inode *inode, int wait);
														
 
															+void btrfs_dirty_inode(struct inode *inode);
														
 
															+struct inode *btrfs_alloc_inode(struct super_block *sb);
														
 
															+void btrfs_destroy_inode(struct inode *inode);
														
 
															+int btrfs_init_cachep(void);
														
 
															+void btrfs_destroy_cachep(void);
														
 
															+long btrfs_ioctl_trans_end(struct file *file);
														
 
															+struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
														
 
															+			    struct btrfs_root *root, int wait);
														
 
															+struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
														
 
															+				struct btrfs_root *root);
														
 
															+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
														
 
															+			 struct btrfs_root *root, int *is_new);
														
 
															+int btrfs_commit_write(struct file *file, struct page *page,
														
 
															+		       unsigned from, unsigned to);
														
 
															+struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
														
 
															+				    size_t page_offset, u64 start, u64 end,
														
 
															+				    int create);
														
 
															+int btrfs_update_inode(struct btrfs_trans_handle *trans,
														
 
															+			      struct btrfs_root *root,
														
 
															+			      struct inode *inode);
														
 
															+int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
														
 
															+int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
														
 
															+void btrfs_orphan_cleanup(struct btrfs_root *root);
														
 
															+
														
 
															+/* ioctl.c */
														
 
															+long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
														
 
															+
														
 
															+/* file.c */
														
 
															+int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
														
 
															+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
														
 
															+			    int skip_pinned);
														
 
															+int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
														
 
															+extern struct file_operations btrfs_file_operations;
														
 
															+int btrfs_drop_extents(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root, struct inode *inode,
														
 
															+		       u64 start, u64 end, u64 inline_limit, u64 *hint_block);
														
 
															+int btrfs_release_file(struct inode *inode, struct file *file);
														
 
															+
														
 
															+/* tree-defrag.c */
														
 
															+int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root, int cache_only);
														
 
															+
														
 
															+/* sysfs.c */
														
 
															+int btrfs_init_sysfs(void);
														
 
															+void btrfs_exit_sysfs(void);
														
 
															+int btrfs_sysfs_add_super(struct btrfs_fs_info *fs);
														
 
															+int btrfs_sysfs_add_root(struct btrfs_root *root);
														
 
															+void btrfs_sysfs_del_root(struct btrfs_root *root);
														
 
															+void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
														
 
															+
														
 
															+/* xattr.c */
														
 
															+ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
														
 
															+
														
 
															+/* super.c */
														
 
															+u64 btrfs_parse_size(char *str);
														
 
															+int btrfs_parse_options(struct btrfs_root *root, char *options);
														
 
															+int btrfs_sync_fs(struct super_block *sb, int wait);
														
 
															+
														
 
															+/* acl.c */
														
 
															+int btrfs_check_acl(struct inode *inode, int mask);
														
 
															+int btrfs_init_acl(struct inode *inode, struct inode *dir);
														
 
															+int btrfs_acl_chmod(struct inode *inode);
														
 
															+
														
 
															+/* free-space-cache.c */
														
 
															+int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			 u64 bytenr, u64 size);
														
 
															+int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			    u64 bytenr, u64 size);
														
 
															+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
														
 
															+				   *block_group);
														
 
															+struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
														
 
															+					       *block_group, u64 offset,
														
 
															+					       u64 bytes);
														
 
															+void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			   u64 bytes);
														
 
															+u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
														
 
															+#endif
														
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -0,0 +1,386 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "hash.h"
														
 
															+#include "transaction.h"
														
 
															+
														
 
															+/*
														
 
															+ * insert a name into a directory, doing overflow properly if there is a hash
														
 
															+ * collision.  data_size indicates how big the item inserted should be.  On
														
 
															+ * success a struct btrfs_dir_item pointer is returned, otherwise it is
														
 
															+ * an ERR_PTR.
														
 
															+ *
														
 
															+ * The name is not copied into the dir item, you have to do that yourself.
														
 
															+ */
														
 
															+static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
														
 
															+						   *trans,
														
 
															+						   struct btrfs_root *root,
														
 
															+						   struct btrfs_path *path,
														
 
															+						   struct btrfs_key *cpu_key,
														
 
															+						   u32 data_size,
														
 
															+						   const char *name,
														
 
															+						   int name_len)
														
 
															+{
														
 
															+	int ret;
														
 
															+	char *ptr;
														
 
															+	struct btrfs_item *item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
														
 
															+	if (ret == -EEXIST) {
														
 
															+		struct btrfs_dir_item *di;
														
 
															+		di = btrfs_match_dir_item_name(root, path, name, name_len);
														
 
															+		if (di)
														
 
															+			return ERR_PTR(-EEXIST);
														
 
															+		ret = btrfs_extend_item(trans, root, path, data_size);
														
 
															+		WARN_ON(ret > 0);
														
 
															+	}
														
 
															+	if (ret < 0)
														
 
															+		return ERR_PTR(ret);
														
 
															+	WARN_ON(ret > 0);
														
 
															+	leaf = path->nodes[0];
														
 
															+	item = btrfs_item_nr(leaf, path->slots[0]);
														
 
															+	ptr = btrfs_item_ptr(leaf, path->slots[0], char);
														
 
															+	BUG_ON(data_size > btrfs_item_size(leaf, item));
														
 
															+	ptr += btrfs_item_size(leaf, item) - data_size;
														
 
															+	return (struct btrfs_dir_item *)ptr;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * xattrs work a lot like directories, this inserts an xattr item
														
 
															+ * into the tree
														
 
															+ */
														
 
															+int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, const char *name,
														
 
															+			    u16 name_len, const void *data, u16 data_len,
														
 
															+			    u64 dir)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_dir_item *dir_item;
														
 
															+	unsigned long name_ptr, data_ptr;
														
 
															+	struct btrfs_key key, location;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	u32 data_size;
														
 
															+
														
 
															+	key.objectid = dir;
														
 
															+	btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
														
 
															+	key.offset = btrfs_name_hash(name, name_len);
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+	if (name_len + data_len + sizeof(struct btrfs_dir_item) >
														
 
															+	    BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
														
 
															+		return -ENOSPC;
														
 
															+
														
 
															+	data_size = sizeof(*dir_item) + name_len + data_len;
														
 
															+	dir_item = insert_with_overflow(trans, root, path, &key, data_size,
														
 
															+					name, name_len);
														
 
															+	/*
														
 
															+	 * FIXME: at some point we should handle xattr's that are larger than
														
 
															+	 * what we can fit in our leaf.  We set location to NULL b/c we arent
														
 
															+	 * pointing at anything else, that will change if we store the xattr
														
 
															+	 * data in a separate inode.
														
 
															+	 */
														
 
															+	BUG_ON(IS_ERR(dir_item));
														
 
															+	memset(&location, 0, sizeof(location));
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_cpu_key_to_disk(&disk_key, &location);
														
 
															+	btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
														
 
															+	btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
														
 
															+	btrfs_set_dir_name_len(leaf, dir_item, name_len);
														
 
															+	btrfs_set_dir_transid(leaf, dir_item, trans->transid);
														
 
															+	btrfs_set_dir_data_len(leaf, dir_item, data_len);
														
 
															+	name_ptr = (unsigned long)(dir_item + 1);
														
 
															+	data_ptr = (unsigned long)((char *)name_ptr + name_len);
														
 
															+
														
 
															+	write_extent_buffer(leaf, name, name_ptr, name_len);
														
 
															+	write_extent_buffer(leaf, data, data_ptr, data_len);
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * insert a directory item in the tree, doing all the magic for
														
 
															+ * both indexes. 'dir' indicates which objectid to insert it into,
														
 
															+ * 'location' is the key to stuff into the directory item, 'type' is the
														
 
															+ * type of the inode we're pointing to, and 'index' is the sequence number
														
 
															+ * to use for the second index (if one is created).
														
 
															+ */
														
 
															+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+			  *root, const char *name, int name_len, u64 dir,
														
 
															+			  struct btrfs_key *location, u8 type, u64 index)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	int ret2 = 0;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_dir_item *dir_item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	unsigned long name_ptr;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	u32 data_size;
														
 
															+
														
 
															+	key.objectid = dir;
														
 
															+	btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
														
 
															+	key.offset = btrfs_name_hash(name, name_len);
														
 
															+	path = btrfs_alloc_path();
														
 
															+	data_size = sizeof(*dir_item) + name_len;
														
 
															+	dir_item = insert_with_overflow(trans, root, path, &key, data_size,
														
 
															+					name, name_len);
														
 
															+	if (IS_ERR(dir_item)) {
														
 
															+		ret = PTR_ERR(dir_item);
														
 
															+		if (ret == -EEXIST)
														
 
															+			goto second_insert;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_cpu_key_to_disk(&disk_key, location);
														
 
															+	btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
														
 
															+	btrfs_set_dir_type(leaf, dir_item, type);
														
 
															+	btrfs_set_dir_data_len(leaf, dir_item, 0);
														
 
															+	btrfs_set_dir_name_len(leaf, dir_item, name_len);
														
 
															+	btrfs_set_dir_transid(leaf, dir_item, trans->transid);
														
 
															+	name_ptr = (unsigned long)(dir_item + 1);
														
 
															+
														
 
															+	write_extent_buffer(leaf, name, name_ptr, name_len);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+second_insert:
														
 
															+	/* FIXME, use some real flag for selecting the extra index */
														
 
															+	if (root == root->fs_info->tree_root) {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
														
 
															+	key.offset = index;
														
 
															+	dir_item = insert_with_overflow(trans, root, path, &key, data_size,
														
 
															+					name, name_len);
														
 
															+	if (IS_ERR(dir_item)) {
														
 
															+		ret2 = PTR_ERR(dir_item);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_cpu_key_to_disk(&disk_key, location);
														
 
															+	btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
														
 
															+	btrfs_set_dir_type(leaf, dir_item, type);
														
 
															+	btrfs_set_dir_data_len(leaf, dir_item, 0);
														
 
															+	btrfs_set_dir_name_len(leaf, dir_item, name_len);
														
 
															+	btrfs_set_dir_transid(leaf, dir_item, trans->transid);
														
 
															+	name_ptr = (unsigned long)(dir_item + 1);
														
 
															+	write_extent_buffer(leaf, name, name_ptr, name_len);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+	if (ret2)
														
 
															+		return ret2;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * lookup a directory item based on name.  'dir' is the objectid
														
 
															+ * we're searching in, and 'mod' tells us if you plan on deleting the
														
 
															+ * item (use mod < 0) or changing the options (use mod > 0)
														
 
															+ */
														
 
															+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
														
 
															+					     struct btrfs_root *root,
														
 
															+					     struct btrfs_path *path, u64 dir,
														
 
															+					     const char *name, int name_len,
														
 
															+					     int mod)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	int ins_len = mod < 0 ? -1 : 0;
														
 
															+	int cow = mod != 0;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	key.objectid = dir;
														
 
															+	btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
														
 
															+
														
 
															+	key.offset = btrfs_name_hash(name, name_len);
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
														
 
															+	if (ret < 0)
														
 
															+		return ERR_PTR(ret);
														
 
															+	if (ret > 0) {
														
 
															+		if (path->slots[0] == 0)
														
 
															+			return NULL;
														
 
															+		path->slots[0]--;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+
														
 
															+	if (found_key.objectid != dir ||
														
 
															+	    btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
														
 
															+	    found_key.offset != key.offset)
														
 
															+		return NULL;
														
 
															+
														
 
															+	return btrfs_match_dir_item_name(root, path, name, name_len);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * lookup a directory item based on index.  'dir' is the objectid
														
 
															+ * we're searching in, and 'mod' tells us if you plan on deleting the
														
 
															+ * item (use mod < 0) or changing the options (use mod > 0)
														
 
															+ *
														
 
															+ * The name is used to make sure the index really points to the name you were
														
 
															+ * looking for.
														
 
															+ */
														
 
															+struct btrfs_dir_item *
														
 
															+btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root,
														
 
															+			    struct btrfs_path *path, u64 dir,
														
 
															+			    u64 objectid, const char *name, int name_len,
														
 
															+			    int mod)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	int ins_len = mod < 0 ? -1 : 0;
														
 
															+	int cow = mod != 0;
														
 
															+
														
 
															+	key.objectid = dir;
														
 
															+	btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
														
 
															+	key.offset = objectid;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
														
 
															+	if (ret < 0)
														
 
															+		return ERR_PTR(ret);
														
 
															+	if (ret > 0)
														
 
															+		return ERR_PTR(-ENOENT);
														
 
															+	return btrfs_match_dir_item_name(root, path, name, name_len);
														
 
															+}
														
 
															+
														
 
															+struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path, u64 dir,
														
 
															+					  const char *name, u16 name_len,
														
 
															+					  int mod)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	int ins_len = mod < 0 ? -1 : 0;
														
 
															+	int cow = mod != 0;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	key.objectid = dir;
														
 
															+	btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
														
 
															+	key.offset = btrfs_name_hash(name, name_len);
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
														
 
															+	if (ret < 0)
														
 
															+		return ERR_PTR(ret);
														
 
															+	if (ret > 0) {
														
 
															+		if (path->slots[0] == 0)
														
 
															+			return NULL;
														
 
															+		path->slots[0]--;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+
														
 
															+	if (found_key.objectid != dir ||
														
 
															+	    btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
														
 
															+	    found_key.offset != key.offset)
														
 
															+		return NULL;
														
 
															+
														
 
															+	return btrfs_match_dir_item_name(root, path, name, name_len);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to look at the directory item pointed to by 'path'
														
 
															+ * this walks through all the entries in a dir item and finds one
														
 
															+ * for a specific name.
														
 
															+ */
														
 
															+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
														
 
															+			      struct btrfs_path *path,
														
 
															+			      const char *name, int name_len)
														
 
															+{
														
 
															+	struct btrfs_dir_item *dir_item;
														
 
															+	unsigned long name_ptr;
														
 
															+	u32 total_len;
														
 
															+	u32 cur = 0;
														
 
															+	u32 this_len;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
														
 
															+	total_len = btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+	while(cur < total_len) {
														
 
															+		this_len = sizeof(*dir_item) +
														
 
															+			btrfs_dir_name_len(leaf, dir_item) +
														
 
															+			btrfs_dir_data_len(leaf, dir_item);
														
 
															+		name_ptr = (unsigned long)(dir_item + 1);
														
 
															+
														
 
															+		if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
														
 
															+		    memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
														
 
															+			return dir_item;
														
 
															+
														
 
															+		cur += this_len;
														
 
															+		dir_item = (struct btrfs_dir_item *)((char *)dir_item +
														
 
															+						     this_len);
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * given a pointer into a directory item, delete it.  This
														
 
															+ * handles items that have more than one entry in them.
														
 
															+ */
														
 
															+int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
														
 
															+			      struct btrfs_root *root,
														
 
															+			      struct btrfs_path *path,
														
 
															+			      struct btrfs_dir_item *di)
														
 
															+{
														
 
															+
														
 
															+	struct extent_buffer *leaf;
														
 
															+	u32 sub_item_len;
														
 
															+	u32 item_len;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
														
 
															+		btrfs_dir_data_len(leaf, di);
														
 
															+	item_len = btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+	if (sub_item_len == item_len) {
														
 
															+		ret = btrfs_del_item(trans, root, path);
														
 
															+	} else {
														
 
															+		/* MARKER */
														
 
															+		unsigned long ptr = (unsigned long)di;
														
 
															+		unsigned long start;
														
 
															+
														
 
															+		start = btrfs_item_ptr_offset(leaf, path->slots[0]);
														
 
															+		memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
														
 
															+			item_len - (ptr + sub_item_len - start));
														
 
															+		ret = btrfs_truncate_item(trans, root, path,
														
 
															+					  item_len - sub_item_len, 1);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -0,0 +1,2078 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/version.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/blkdev.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <linux/swap.h>
														
 
															+#include <linux/radix-tree.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/buffer_head.h> // for block_sync_page
														
 
															+#include <linux/workqueue.h>
														
 
															+#include <linux/kthread.h>
														
 
															+# include <linux/freezer.h>
														
 
															+#include "crc32c.h"
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "volumes.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "async-thread.h"
														
 
															+#include "locking.h"
														
 
															+#include "ref-cache.h"
														
 
															+#include "tree-log.h"
														
 
															+
														
 
															+#if 0
														
 
															+static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
														
 
															+{
														
 
															+	if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
														
 
															+		printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
														
 
															+		       (unsigned long long)extent_buffer_blocknr(buf),
														
 
															+		       (unsigned long long)btrfs_header_blocknr(buf));
														
 
															+		return 1;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static struct extent_io_ops btree_extent_io_ops;
														
 
															+static void end_workqueue_fn(struct btrfs_work *work);
														
 
															+
														
 
															+/*
														
 
															+ * end_io_wq structs are used to do processing in task context when an IO is
														
 
															+ * complete.  This is used during reads to verify checksums, and it is used
														
 
															+ * by writes to insert metadata for new file extents after IO is complete.
														
 
															+ */
														
 
															+struct end_io_wq {
														
 
															+	struct bio *bio;
														
 
															+	bio_end_io_t *end_io;
														
 
															+	void *private;
														
 
															+	struct btrfs_fs_info *info;
														
 
															+	int error;
														
 
															+	int metadata;
														
 
															+	struct list_head list;
														
 
															+	struct btrfs_work work;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * async submit bios are used to offload expensive checksumming
														
 
															+ * onto the worker threads.  They checksum file and metadata bios
														
 
															+ * just before they are sent down the IO stack.
														
 
															+ */
														
 
															+struct async_submit_bio {
														
 
															+	struct inode *inode;
														
 
															+	struct bio *bio;
														
 
															+	struct list_head list;
														
 
															+	extent_submit_bio_hook_t *submit_bio_hook;
														
 
															+	int rw;
														
 
															+	int mirror_num;
														
 
															+	struct btrfs_work work;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * extents on the btree inode are pretty simple, there's one extent
														
 
															+ * that covers the entire device
														
 
															+ */
														
 
															+struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
														
 
															+				    size_t page_offset, u64 start, u64 len,
														
 
															+				    int create)
														
 
															+{
														
 
															+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
														
 
															+	struct extent_map *em;
														
 
															+	int ret;
														
 
															+
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	em = lookup_extent_mapping(em_tree, start, len);
														
 
															+	if (em) {
														
 
															+		em->bdev =
														
 
															+			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
														
 
															+		spin_unlock(&em_tree->lock);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+	em = alloc_extent_map(GFP_NOFS);
														
 
															+	if (!em) {
														
 
															+		em = ERR_PTR(-ENOMEM);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	em->start = 0;
														
 
															+	em->len = (u64)-1;
														
 
															+	em->block_start = 0;
														
 
															+	em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
														
 
															+
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	ret = add_extent_mapping(em_tree, em);
														
 
															+	if (ret == -EEXIST) {
														
 
															+		u64 failed_start = em->start;
														
 
															+		u64 failed_len = em->len;
														
 
															+
														
 
															+		printk("failed to insert %Lu %Lu -> %Lu into tree\n",
														
 
															+		       em->start, em->len, em->block_start);
														
 
															+		free_extent_map(em);
														
 
															+		em = lookup_extent_mapping(em_tree, start, len);
														
 
															+		if (em) {
														
 
															+			printk("after failing, found %Lu %Lu %Lu\n",
														
 
															+			       em->start, em->len, em->block_start);
														
 
															+			ret = 0;
														
 
															+		} else {
														
 
															+			em = lookup_extent_mapping(em_tree, failed_start,
														
 
															+						   failed_len);
														
 
															+			if (em) {
														
 
															+				printk("double failure lookup gives us "
														
 
															+				       "%Lu %Lu -> %Lu\n", em->start,
														
 
															+				       em->len, em->block_start);
														
 
															+				free_extent_map(em);
														
 
															+			}
														
 
															+			ret = -EIO;
														
 
															+		}
														
 
															+	} else if (ret) {
														
 
															+		free_extent_map(em);
														
 
															+		em = NULL;
														
 
															+	}
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+	if (ret)
														
 
															+		em = ERR_PTR(ret);
														
 
															+out:
														
 
															+	return em;
														
 
															+}
														
 
															+
														
 
															+u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
														
 
															+{
														
 
															+	return btrfs_crc32c(seed, data, len);
														
 
															+}
														
 
															+
														
 
															+void btrfs_csum_final(u32 crc, char *result)
														
 
															+{
														
 
															+	*(__le32 *)result = ~cpu_to_le32(crc);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * compute the csum for a btree block, and either verify it or write it
														
 
															+ * into the csum field of the block.
														
 
															+ */
														
 
															+static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
														
 
															+			   int verify)
														
 
															+{
														
 
															+	char result[BTRFS_CRC32_SIZE];
														
 
															+	unsigned long len;
														
 
															+	unsigned long cur_len;
														
 
															+	unsigned long offset = BTRFS_CSUM_SIZE;
														
 
															+	char *map_token = NULL;
														
 
															+	char *kaddr;
														
 
															+	unsigned long map_start;
														
 
															+	unsigned long map_len;
														
 
															+	int err;
														
 
															+	u32 crc = ~(u32)0;
														
 
															+
														
 
															+	len = buf->len - offset;
														
 
															+	while(len > 0) {
														
 
															+		err = map_private_extent_buffer(buf, offset, 32,
														
 
															+					&map_token, &kaddr,
														
 
															+					&map_start, &map_len, KM_USER0);
														
 
															+		if (err) {
														
 
															+			printk("failed to map extent buffer! %lu\n",
														
 
															+			       offset);
														
 
															+			return 1;
														
 
															+		}
														
 
															+		cur_len = min(len, map_len - (offset - map_start));
														
 
															+		crc = btrfs_csum_data(root, kaddr + offset - map_start,
														
 
															+				      crc, cur_len);
														
 
															+		len -= cur_len;
														
 
															+		offset += cur_len;
														
 
															+		unmap_extent_buffer(buf, map_token, KM_USER0);
														
 
															+	}
														
 
															+	btrfs_csum_final(crc, result);
														
 
															+
														
 
															+	if (verify) {
														
 
															+		/* FIXME, this is not good */
														
 
															+		if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
														
 
															+			u32 val;
														
 
															+			u32 found = 0;
														
 
															+			memcpy(&found, result, BTRFS_CRC32_SIZE);
														
 
															+
														
 
															+			read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
														
 
															+			printk("btrfs: %s checksum verify failed on %llu "
														
 
															+			       "wanted %X found %X level %d\n",
														
 
															+			       root->fs_info->sb->s_id,
														
 
															+			       buf->start, val, found, btrfs_header_level(buf));
														
 
															+			return 1;
														
 
															+		}
														
 
															+	} else {
														
 
															+		write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * we can't consider a given block up to date unless the transid of the
														
 
															+ * block matches the transid in the parent node's pointer.  This is how we
														
 
															+ * detect blocks that either didn't get written at all or got written
														
 
															+ * in the wrong place.
														
 
															+ */
														
 
															+static int verify_parent_transid(struct extent_io_tree *io_tree,
														
 
															+				 struct extent_buffer *eb, u64 parent_transid)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
														
 
															+		return 0;
														
 
															+
														
 
															+	lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS);
														
 
															+	if (extent_buffer_uptodate(io_tree, eb) &&
														
 
															+	    btrfs_header_generation(eb) == parent_transid) {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	printk("parent transid verify failed on %llu wanted %llu found %llu\n",
														
 
															+	       (unsigned long long)eb->start,
														
 
															+	       (unsigned long long)parent_transid,
														
 
															+	       (unsigned long long)btrfs_header_generation(eb));
														
 
															+	ret = 1;
														
 
															+	clear_extent_buffer_uptodate(io_tree, eb);
														
 
															+out:
														
 
															+	unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
														
 
															+		      GFP_NOFS);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper to read a given tree block, doing retries as required when
														
 
															+ * the checksums don't match and we have alternate mirrors to try.
														
 
															+ */
														
 
															+static int btree_read_extent_buffer_pages(struct btrfs_root *root,
														
 
															+					  struct extent_buffer *eb,
														
 
															+					  u64 start, u64 parent_transid)
														
 
															+{
														
 
															+	struct extent_io_tree *io_tree;
														
 
															+	int ret;
														
 
															+	int num_copies = 0;
														
 
															+	int mirror_num = 0;
														
 
															+
														
 
															+	io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
														
 
															+	while (1) {
														
 
															+		ret = read_extent_buffer_pages(io_tree, eb, start, 1,
														
 
															+					       btree_get_extent, mirror_num);
														
 
															+		if (!ret &&
														
 
															+		    !verify_parent_transid(io_tree, eb, parent_transid))
														
 
															+			return ret;
														
 
															+printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num);
														
 
															+		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
														
 
															+					      eb->start, eb->len);
														
 
															+		if (num_copies == 1)
														
 
															+			return ret;
														
 
															+
														
 
															+		mirror_num++;
														
 
															+		if (mirror_num > num_copies)
														
 
															+			return ret;
														
 
															+	}
														
 
															+	return -EIO;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * checksum a dirty tree block before IO.  This has extra checks to make
														
 
															+ * sure we only fill in the checksum field in the first page of a multi-page block
														
 
															+ */
														
 
															+int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 found_start;
														
 
															+	int found_level;
														
 
															+	unsigned long len;
														
 
															+	struct extent_buffer *eb;
														
 
															+	int ret;
														
 
															+
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+
														
 
															+	if (page->private == EXTENT_PAGE_PRIVATE)
														
 
															+		goto out;
														
 
															+	if (!page->private)
														
 
															+		goto out;
														
 
															+	len = page->private >> 2;
														
 
															+	if (len == 0) {
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
														
 
															+	ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
														
 
															+					     btrfs_header_generation(eb));
														
 
															+	BUG_ON(ret);
														
 
															+	found_start = btrfs_header_bytenr(eb);
														
 
															+	if (found_start != start) {
														
 
															+		printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
														
 
															+		       start, found_start, len);
														
 
															+		WARN_ON(1);
														
 
															+		goto err;
														
 
															+	}
														
 
															+	if (eb->first_page != page) {
														
 
															+		printk("bad first page %lu %lu\n", eb->first_page->index,
														
 
															+		       page->index);
														
 
															+		WARN_ON(1);
														
 
															+		goto err;
														
 
															+	}
														
 
															+	if (!PageUptodate(page)) {
														
 
															+		printk("csum not up to date page %lu\n", page->index);
														
 
															+		WARN_ON(1);
														
 
															+		goto err;
														
 
															+	}
														
 
															+	found_level = btrfs_header_level(eb);
														
 
															+
														
 
															+	csum_tree_block(root, eb, 0);
														
 
															+err:
														
 
															+	free_extent_buffer(eb);
														
 
															+out:
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
														
 
															+			       struct extent_state *state)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	u64 found_start;
														
 
															+	int found_level;
														
 
															+	unsigned long len;
														
 
															+	struct extent_buffer *eb;
														
 
															+	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	if (page->private == EXTENT_PAGE_PRIVATE)
														
 
															+		goto out;
														
 
															+	if (!page->private)
														
 
															+		goto out;
														
 
															+	len = page->private >> 2;
														
 
															+	if (len == 0) {
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
														
 
															+
														
 
															+	found_start = btrfs_header_bytenr(eb);
														
 
															+	if (found_start != start) {
														
 
															+		printk("bad tree block start %llu %llu\n",
														
 
															+		       (unsigned long long)found_start,
														
 
															+		       (unsigned long long)eb->start);
														
 
															+		ret = -EIO;
														
 
															+		goto err;
														
 
															+	}
														
 
															+	if (eb->first_page != page) {
														
 
															+		printk("bad first page %lu %lu\n", eb->first_page->index,
														
 
															+		       page->index);
														
 
															+		WARN_ON(1);
														
 
															+		ret = -EIO;
														
 
															+		goto err;
														
 
															+	}
														
 
															+	if (memcmp_extent_buffer(eb, root->fs_info->fsid,
														
 
															+				 (unsigned long)btrfs_header_fsid(eb),
														
 
															+				 BTRFS_FSID_SIZE)) {
														
 
															+		printk("bad fsid on block %Lu\n", eb->start);
														
 
															+		ret = -EIO;
														
 
															+		goto err;
														
 
															+	}
														
 
															+	found_level = btrfs_header_level(eb);
														
 
															+
														
 
															+	ret = csum_tree_block(root, eb, 1);
														
 
															+	if (ret)
														
 
															+		ret = -EIO;
														
 
															+
														
 
															+	end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
														
 
															+	end = eb->start + end - 1;
														
 
															+err:
														
 
															+	free_extent_buffer(eb);
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void end_workqueue_bio(struct bio *bio, int err)
														
 
															+{
														
 
															+	struct end_io_wq *end_io_wq = bio->bi_private;
														
 
															+	struct btrfs_fs_info *fs_info;
														
 
															+
														
 
															+	fs_info = end_io_wq->info;
														
 
															+	end_io_wq->error = err;
														
 
															+	end_io_wq->work.func = end_workqueue_fn;
														
 
															+	end_io_wq->work.flags = 0;
														
 
															+	if (bio->bi_rw & (1 << BIO_RW))
														
 
															+		btrfs_queue_worker(&fs_info->endio_write_workers,
														
 
															+				   &end_io_wq->work);
														
 
															+	else
														
 
															+		btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
														
 
															+}
														
 
															+
														
 
															+int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
														
 
															+			int metadata)
														
 
															+{
														
 
															+	struct end_io_wq *end_io_wq;
														
 
															+	end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
														
 
															+	if (!end_io_wq)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	end_io_wq->private = bio->bi_private;
														
 
															+	end_io_wq->end_io = bio->bi_end_io;
														
 
															+	end_io_wq->info = info;
														
 
															+	end_io_wq->error = 0;
														
 
															+	end_io_wq->bio = bio;
														
 
															+	end_io_wq->metadata = metadata;
														
 
															+
														
 
															+	bio->bi_private = end_io_wq;
														
 
															+	bio->bi_end_io = end_workqueue_bio;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
														
 
															+{
														
 
															+	unsigned long limit = min_t(unsigned long,
														
 
															+				    info->workers.max_workers,
														
 
															+				    info->fs_devices->open_devices);
														
 
															+	return 256 * limit;
														
 
															+}
														
 
															+
														
 
															+int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
														
 
															+{
														
 
															+	return atomic_read(&info->nr_async_bios) >
														
 
															+		btrfs_async_submit_limit(info);
														
 
															+}
														
 
															+
														
 
															+static void run_one_async_submit(struct btrfs_work *work)
														
 
															+{
														
 
															+	struct btrfs_fs_info *fs_info;
														
 
															+	struct async_submit_bio *async;
														
 
															+	int limit;
														
 
															+
														
 
															+	async = container_of(work, struct  async_submit_bio, work);
														
 
															+	fs_info = BTRFS_I(async->inode)->root->fs_info;
														
 
															+
														
 
															+	limit = btrfs_async_submit_limit(fs_info);
														
 
															+	limit = limit * 2 / 3;
														
 
															+
														
 
															+	atomic_dec(&fs_info->nr_async_submits);
														
 
															+
														
 
															+	if (atomic_read(&fs_info->nr_async_submits) < limit &&
														
 
															+	    waitqueue_active(&fs_info->async_submit_wait))
														
 
															+		wake_up(&fs_info->async_submit_wait);
														
 
															+
														
 
															+	async->submit_bio_hook(async->inode, async->rw, async->bio,
														
 
															+			       async->mirror_num);
														
 
															+	kfree(async);
														
 
															+}
														
 
															+
														
 
															+int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
														
 
															+			int rw, struct bio *bio, int mirror_num,
														
 
															+			extent_submit_bio_hook_t *submit_bio_hook)
														
 
															+{
														
 
															+	struct async_submit_bio *async;
														
 
															+	int limit = btrfs_async_submit_limit(fs_info);
														
 
															+
														
 
															+	async = kmalloc(sizeof(*async), GFP_NOFS);
														
 
															+	if (!async)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	async->inode = inode;
														
 
															+	async->rw = rw;
														
 
															+	async->bio = bio;
														
 
															+	async->mirror_num = mirror_num;
														
 
															+	async->submit_bio_hook = submit_bio_hook;
														
 
															+	async->work.func = run_one_async_submit;
														
 
															+	async->work.flags = 0;
														
 
															+
														
 
															+	while(atomic_read(&fs_info->async_submit_draining) &&
														
 
															+	      atomic_read(&fs_info->nr_async_submits)) {
														
 
															+		wait_event(fs_info->async_submit_wait,
														
 
															+			   (atomic_read(&fs_info->nr_async_submits) == 0));
														
 
															+	}
														
 
															+
														
 
															+	atomic_inc(&fs_info->nr_async_submits);
														
 
															+	btrfs_queue_worker(&fs_info->workers, &async->work);
														
 
															+
														
 
															+	if (atomic_read(&fs_info->nr_async_submits) > limit) {
														
 
															+		wait_event_timeout(fs_info->async_submit_wait,
														
 
															+			   (atomic_read(&fs_info->nr_async_submits) < limit),
														
 
															+			   HZ/10);
														
 
															+
														
 
															+		wait_event_timeout(fs_info->async_submit_wait,
														
 
															+			   (atomic_read(&fs_info->nr_async_bios) < limit),
														
 
															+			   HZ/10);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int btree_csum_one_bio(struct bio *bio)
														
 
															+{
														
 
															+	struct bio_vec *bvec = bio->bi_io_vec;
														
 
															+	int bio_index = 0;
														
 
															+	struct btrfs_root *root;
														
 
															+
														
 
															+	WARN_ON(bio->bi_vcnt <= 0);
														
 
															+	while(bio_index < bio->bi_vcnt) {
														
 
															+		root = BTRFS_I(bvec->bv_page->mapping->host)->root;
														
 
															+		csum_dirty_buffer(root, bvec->bv_page);
														
 
															+		bio_index++;
														
 
															+		bvec++;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
														
 
															+				 int mirror_num)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret;
														
 
															+
														
 
															+	/*
														
 
															+	 * when we're called for a write, we're already in the async
														
 
															+	 * submission context.  Just jump into btrfs_map_bio
														
 
															+	 */
														
 
															+	if (rw & (1 << BIO_RW)) {
														
 
															+		btree_csum_one_bio(bio);
														
 
															+		return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
														
 
															+				     mirror_num, 1);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * called for a read, do the setup so that checksum validation
														
 
															+	 * can happen in the async kernel threads
														
 
															+	 */
														
 
															+	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
														
 
															+}
														
 
															+
														
 
															+static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
														
 
															+				 int mirror_num)
														
 
															+{
														
 
															+	/*
														
 
															+	 * kthread helpers are used to submit writes so that checksumming
														
 
															+	 * can happen in parallel across all CPUs
														
 
															+	 */
														
 
															+	if (!(rw & (1 << BIO_RW))) {
														
 
															+		return __btree_submit_bio_hook(inode, rw, bio, mirror_num);
														
 
															+	}
														
 
															+	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
														
 
															+				   inode, rw, bio, mirror_num,
														
 
															+				   __btree_submit_bio_hook);
														
 
															+}
														
 
															+
														
 
															+static int btree_writepage(struct page *page, struct writeback_control *wbc)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+
														
 
															+	if (current->flags & PF_MEMALLOC) {
														
 
															+		redirty_page_for_writepage(wbc, page);
														
 
															+		unlock_page(page);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	return extent_write_full_page(tree, page, btree_get_extent, wbc);
														
 
															+}
														
 
															+
														
 
															+static int btree_writepages(struct address_space *mapping,
														
 
															+			    struct writeback_control *wbc)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	tree = &BTRFS_I(mapping->host)->io_tree;
														
 
															+	if (wbc->sync_mode == WB_SYNC_NONE) {
														
 
															+		u64 num_dirty;
														
 
															+		u64 start = 0;
														
 
															+		unsigned long thresh = 32 * 1024 * 1024;
														
 
															+
														
 
															+		if (wbc->for_kupdate)
														
 
															+			return 0;
														
 
															+
														
 
															+		num_dirty = count_range_bits(tree, &start, (u64)-1,
														
 
															+					     thresh, EXTENT_DIRTY);
														
 
															+		if (num_dirty < thresh) {
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+	return extent_writepages(tree, mapping, btree_get_extent, wbc);
														
 
															+}
														
 
															+
														
 
															+int btree_readpage(struct file *file, struct page *page)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	return extent_read_full_page(tree, page, btree_get_extent);
														
 
															+}
														
 
															+
														
 
															+static int btree_releasepage(struct page *page, gfp_t gfp_flags)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	struct extent_map_tree *map;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (PageWriteback(page) || PageDirty(page))
														
 
															+	    return 0;
														
 
															+
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	map = &BTRFS_I(page->mapping->host)->extent_tree;
														
 
															+
														
 
															+	ret = try_release_extent_state(map, tree, page, gfp_flags);
														
 
															+	if (!ret) {
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	ret = try_release_extent_buffer(tree, page);
														
 
															+	if (ret == 1) {
														
 
															+		ClearPagePrivate(page);
														
 
															+		set_page_private(page, 0);
														
 
															+		page_cache_release(page);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void btree_invalidatepage(struct page *page, unsigned long offset)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	extent_invalidatepage(tree, page, offset);
														
 
															+	btree_releasepage(page, GFP_NOFS);
														
 
															+	if (PagePrivate(page)) {
														
 
															+		printk("warning page private not zero on page %Lu\n",
														
 
															+		       page_offset(page));
														
 
															+		ClearPagePrivate(page);
														
 
															+		set_page_private(page, 0);
														
 
															+		page_cache_release(page);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+#if 0
														
 
															+static int btree_writepage(struct page *page, struct writeback_control *wbc)
														
 
															+{
														
 
															+	struct buffer_head *bh;
														
 
															+	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
														
 
															+	struct buffer_head *head;
														
 
															+	if (!page_has_buffers(page)) {
														
 
															+		create_empty_buffers(page, root->fs_info->sb->s_blocksize,
														
 
															+					(1 << BH_Dirty)|(1 << BH_Uptodate));
														
 
															+	}
														
 
															+	head = page_buffers(page);
														
 
															+	bh = head;
														
 
															+	do {
														
 
															+		if (buffer_dirty(bh))
														
 
															+			csum_tree_block(root, bh, 0);
														
 
															+		bh = bh->b_this_page;
														
 
															+	} while (bh != head);
														
 
															+	return block_write_full_page(page, btree_get_block, wbc);
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static struct address_space_operations btree_aops = {
														
 
															+	.readpage	= btree_readpage,
														
 
															+	.writepage	= btree_writepage,
														
 
															+	.writepages	= btree_writepages,
														
 
															+	.releasepage	= btree_releasepage,
														
 
															+	.invalidatepage = btree_invalidatepage,
														
 
															+	.sync_page	= block_sync_page,
														
 
															+};
														
 
															+
														
 
															+int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
														
 
															+			 u64 parent_transid)
														
 
															+{
														
 
															+	struct extent_buffer *buf = NULL;
														
 
															+	struct inode *btree_inode = root->fs_info->btree_inode;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
														
 
															+	if (!buf)
														
 
															+		return 0;
														
 
															+	read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
														
 
															+				 buf, 0, 0, btree_get_extent, 0);
														
 
															+	free_extent_buffer(buf);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
														
 
															+					    u64 bytenr, u32 blocksize)
														
 
															+{
														
 
															+	struct inode *btree_inode = root->fs_info->btree_inode;
														
 
															+	struct extent_buffer *eb;
														
 
															+	eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
														
 
															+				bytenr, blocksize, GFP_NOFS);
														
 
															+	return eb;
														
 
															+}
														
 
															+
														
 
															+struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
														
 
															+						 u64 bytenr, u32 blocksize)
														
 
															+{
														
 
															+	struct inode *btree_inode = root->fs_info->btree_inode;
														
 
															+	struct extent_buffer *eb;
														
 
															+
														
 
															+	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
														
 
															+				 bytenr, blocksize, NULL, GFP_NOFS);
														
 
															+	return eb;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+int btrfs_write_tree_block(struct extent_buffer *buf)
														
 
															+{
														
 
															+	return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start,
														
 
															+				      buf->start + buf->len - 1, WB_SYNC_ALL);
														
 
															+}
														
 
															+
														
 
															+int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
														
 
															+{
														
 
															+	return btrfs_wait_on_page_writeback_range(buf->first_page->mapping,
														
 
															+				  buf->start, buf->start + buf->len -1);
														
 
															+}
														
 
															+
														
 
															+struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
														
 
															+				      u32 blocksize, u64 parent_transid)
														
 
															+{
														
 
															+	struct extent_buffer *buf = NULL;
														
 
															+	struct inode *btree_inode = root->fs_info->btree_inode;
														
 
															+	struct extent_io_tree *io_tree;
														
 
															+	int ret;
														
 
															+
														
 
															+	io_tree = &BTRFS_I(btree_inode)->io_tree;
														
 
															+
														
 
															+	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
														
 
															+	if (!buf)
														
 
															+		return NULL;
														
 
															+
														
 
															+	ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
														
 
															+
														
 
															+	if (ret == 0) {
														
 
															+		buf->flags |= EXTENT_UPTODATE;
														
 
															+	} else {
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+	return buf;
														
 
															+
														
 
															+}
														
 
															+
														
 
															+int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		     struct extent_buffer *buf)
														
 
															+{
														
 
															+	struct inode *btree_inode = root->fs_info->btree_inode;
														
 
															+	if (btrfs_header_generation(buf) ==
														
 
															+	    root->fs_info->running_transaction->transid) {
														
 
															+		WARN_ON(!btrfs_tree_locked(buf));
														
 
															+		clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
														
 
															+					  buf);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
														
 
															+			u32 stripesize, struct btrfs_root *root,
														
 
															+			struct btrfs_fs_info *fs_info,
														
 
															+			u64 objectid)
														
 
															+{
														
 
															+	root->node = NULL;
														
 
															+	root->inode = NULL;
														
 
															+	root->commit_root = NULL;
														
 
															+	root->ref_tree = NULL;
														
 
															+	root->sectorsize = sectorsize;
														
 
															+	root->nodesize = nodesize;
														
 
															+	root->leafsize = leafsize;
														
 
															+	root->stripesize = stripesize;
														
 
															+	root->ref_cows = 0;
														
 
															+	root->track_dirty = 0;
														
 
															+
														
 
															+	root->fs_info = fs_info;
														
 
															+	root->objectid = objectid;
														
 
															+	root->last_trans = 0;
														
 
															+	root->highest_inode = 0;
														
 
															+	root->last_inode_alloc = 0;
														
 
															+	root->name = NULL;
														
 
															+	root->in_sysfs = 0;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&root->dirty_list);
														
 
															+	INIT_LIST_HEAD(&root->orphan_list);
														
 
															+	INIT_LIST_HEAD(&root->dead_list);
														
 
															+	spin_lock_init(&root->node_lock);
														
 
															+	spin_lock_init(&root->list_lock);
														
 
															+	mutex_init(&root->objectid_mutex);
														
 
															+	mutex_init(&root->log_mutex);
														
 
															+	extent_io_tree_init(&root->dirty_log_pages,
														
 
															+			     fs_info->btree_inode->i_mapping, GFP_NOFS);
														
 
															+
														
 
															+	btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
														
 
															+	root->ref_tree = &root->ref_tree_struct;
														
 
															+
														
 
															+	memset(&root->root_key, 0, sizeof(root->root_key));
														
 
															+	memset(&root->root_item, 0, sizeof(root->root_item));
														
 
															+	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
														
 
															+	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
														
 
															+	root->defrag_trans_start = fs_info->generation;
														
 
															+	init_completion(&root->kobj_unregister);
														
 
															+	root->defrag_running = 0;
														
 
															+	root->defrag_level = 0;
														
 
															+	root->root_key.objectid = objectid;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int find_and_setup_root(struct btrfs_root *tree_root,
														
 
															+			       struct btrfs_fs_info *fs_info,
														
 
															+			       u64 objectid,
														
 
															+			       struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u32 blocksize;
														
 
															+
														
 
															+	__setup_root(tree_root->nodesize, tree_root->leafsize,
														
 
															+		     tree_root->sectorsize, tree_root->stripesize,
														
 
															+		     root, fs_info, objectid);
														
 
															+	ret = btrfs_find_last_root(tree_root, objectid,
														
 
															+				   &root->root_item, &root->root_key);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
														
 
															+	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
														
 
															+				     blocksize, 0);
														
 
															+	BUG_ON(!root->node);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_fs_info *fs_info)
														
 
															+{
														
 
															+	struct extent_buffer *eb;
														
 
															+	struct btrfs_root *log_root_tree = fs_info->log_root_tree;
														
 
															+	u64 start = 0;
														
 
															+	u64 end = 0;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!log_root_tree)
														
 
															+		return 0;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
														
 
															+				    0, &start, &end, EXTENT_DIRTY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		clear_extent_dirty(&log_root_tree->dirty_log_pages,
														
 
															+				   start, end, GFP_NOFS);
														
 
															+	}
														
 
															+	eb = fs_info->log_root_tree->node;
														
 
															+
														
 
															+	WARN_ON(btrfs_header_level(eb) != 0);
														
 
															+	WARN_ON(btrfs_header_nritems(eb) != 0);
														
 
															+
														
 
															+	ret = btrfs_free_reserved_extent(fs_info->tree_root,
														
 
															+				eb->start, eb->len);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	free_extent_buffer(eb);
														
 
															+	kfree(fs_info->log_root_tree);
														
 
															+	fs_info->log_root_tree = NULL;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_fs_info *fs_info)
														
 
															+{
														
 
															+	struct btrfs_root *root;
														
 
															+	struct btrfs_root *tree_root = fs_info->tree_root;
														
 
															+
														
 
															+	root = kzalloc(sizeof(*root), GFP_NOFS);
														
 
															+	if (!root)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	__setup_root(tree_root->nodesize, tree_root->leafsize,
														
 
															+		     tree_root->sectorsize, tree_root->stripesize,
														
 
															+		     root, fs_info, BTRFS_TREE_LOG_OBJECTID);
														
 
															+
														
 
															+	root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
														
 
															+	root->root_key.type = BTRFS_ROOT_ITEM_KEY;
														
 
															+	root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
														
 
															+	root->ref_cows = 0;
														
 
															+
														
 
															+	root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
														
 
															+					    0, BTRFS_TREE_LOG_OBJECTID,
														
 
															+					    trans->transid, 0, 0, 0);
														
 
															+
														
 
															+	btrfs_set_header_nritems(root->node, 0);
														
 
															+	btrfs_set_header_level(root->node, 0);
														
 
															+	btrfs_set_header_bytenr(root->node, root->node->start);
														
 
															+	btrfs_set_header_generation(root->node, trans->transid);
														
 
															+	btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
														
 
															+
														
 
															+	write_extent_buffer(root->node, root->fs_info->fsid,
														
 
															+			    (unsigned long)btrfs_header_fsid(root->node),
														
 
															+			    BTRFS_FSID_SIZE);
														
 
															+	btrfs_mark_buffer_dirty(root->node);
														
 
															+	btrfs_tree_unlock(root->node);
														
 
															+	fs_info->log_root_tree = root;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
														
 
															+					       struct btrfs_key *location)
														
 
															+{
														
 
															+	struct btrfs_root *root;
														
 
															+	struct btrfs_fs_info *fs_info = tree_root->fs_info;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *l;
														
 
															+	u64 highest_inode;
														
 
															+	u32 blocksize;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	root = kzalloc(sizeof(*root), GFP_NOFS);
														
 
															+	if (!root)
														
 
															+		return ERR_PTR(-ENOMEM);
														
 
															+	if (location->offset == (u64)-1) {
														
 
															+		ret = find_and_setup_root(tree_root, fs_info,
														
 
															+					  location->objectid, root);
														
 
															+		if (ret) {
														
 
															+			kfree(root);
														
 
															+			return ERR_PTR(ret);
														
 
															+		}
														
 
															+		goto insert;
														
 
															+	}
														
 
															+
														
 
															+	__setup_root(tree_root->nodesize, tree_root->leafsize,
														
 
															+		     tree_root->sectorsize, tree_root->stripesize,
														
 
															+		     root, fs_info, location->objectid);
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
														
 
															+	if (ret != 0) {
														
 
															+		if (ret > 0)
														
 
															+			ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	l = path->nodes[0];
														
 
															+	read_extent_buffer(l, &root->root_item,
														
 
															+	       btrfs_item_ptr_offset(l, path->slots[0]),
														
 
															+	       sizeof(root->root_item));
														
 
															+	memcpy(&root->root_key, location, sizeof(*location));
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_free_path(path);
														
 
															+	if (ret) {
														
 
															+		kfree(root);
														
 
															+		return ERR_PTR(ret);
														
 
															+	}
														
 
															+	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
														
 
															+	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
														
 
															+				     blocksize, 0);
														
 
															+	BUG_ON(!root->node);
														
 
															+insert:
														
 
															+	if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
														
 
															+		root->ref_cows = 1;
														
 
															+		ret = btrfs_find_highest_inode(root, &highest_inode);
														
 
															+		if (ret == 0) {
														
 
															+			root->highest_inode = highest_inode;
														
 
															+			root->last_inode_alloc = highest_inode;
														
 
															+		}
														
 
															+	}
														
 
															+	return root;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
														
 
															+					u64 root_objectid)
														
 
															+{
														
 
															+	struct btrfs_root *root;
														
 
															+
														
 
															+	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
														
 
															+		return fs_info->tree_root;
														
 
															+	if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
														
 
															+		return fs_info->extent_root;
														
 
															+
														
 
															+	root = radix_tree_lookup(&fs_info->fs_roots_radix,
														
 
															+				 (unsigned long)root_objectid);
														
 
															+	return root;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
														
 
															+					      struct btrfs_key *location)
														
 
															+{
														
 
															+	struct btrfs_root *root;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
														
 
															+		return fs_info->tree_root;
														
 
															+	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
														
 
															+		return fs_info->extent_root;
														
 
															+	if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
														
 
															+		return fs_info->chunk_root;
														
 
															+	if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
														
 
															+		return fs_info->dev_root;
														
 
															+
														
 
															+	root = radix_tree_lookup(&fs_info->fs_roots_radix,
														
 
															+				 (unsigned long)location->objectid);
														
 
															+	if (root)
														
 
															+		return root;
														
 
															+
														
 
															+	root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
														
 
															+	if (IS_ERR(root))
														
 
															+		return root;
														
 
															+	ret = radix_tree_insert(&fs_info->fs_roots_radix,
														
 
															+				(unsigned long)root->root_key.objectid,
														
 
															+				root);
														
 
															+	if (ret) {
														
 
															+		free_extent_buffer(root->node);
														
 
															+		kfree(root);
														
 
															+		return ERR_PTR(ret);
														
 
															+	}
														
 
															+	ret = btrfs_find_dead_roots(fs_info->tree_root,
														
 
															+				    root->root_key.objectid, root);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	return root;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
														
 
															+				      struct btrfs_key *location,
														
 
															+				      const char *name, int namelen)
														
 
															+{
														
 
															+	struct btrfs_root *root;
														
 
															+	int ret;
														
 
															+
														
 
															+	root = btrfs_read_fs_root_no_name(fs_info, location);
														
 
															+	if (!root)
														
 
															+		return NULL;
														
 
															+
														
 
															+	if (root->in_sysfs)
														
 
															+		return root;
														
 
															+
														
 
															+	ret = btrfs_set_root_name(root, name, namelen);
														
 
															+	if (ret) {
														
 
															+		free_extent_buffer(root->node);
														
 
															+		kfree(root);
														
 
															+		return ERR_PTR(ret);
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_sysfs_add_root(root);
														
 
															+	if (ret) {
														
 
															+		free_extent_buffer(root->node);
														
 
															+		kfree(root->name);
														
 
															+		kfree(root);
														
 
															+		return ERR_PTR(ret);
														
 
															+	}
														
 
															+	root->in_sysfs = 1;
														
 
															+	return root;
														
 
															+}
														
 
															+#if 0
														
 
															+static int add_hasher(struct btrfs_fs_info *info, char *type) {
														
 
															+	struct btrfs_hasher *hasher;
														
 
															+
														
 
															+	hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
														
 
															+	if (!hasher)
														
 
															+		return -ENOMEM;
														
 
															+	hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
														
 
															+	if (!hasher->hash_tfm) {
														
 
															+		kfree(hasher);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	spin_lock(&info->hash_lock);
														
 
															+	list_add(&hasher->list, &info->hashers);
														
 
															+	spin_unlock(&info->hash_lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static int btrfs_congested_fn(void *congested_data, int bdi_bits)
														
 
															+{
														
 
															+	struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
														
 
															+	int ret = 0;
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_device *device;
														
 
															+	struct backing_dev_info *bdi;
														
 
															+
														
 
															+	if ((bdi_bits & (1 << BDI_write_congested)) &&
														
 
															+	    btrfs_congested_async(info, 0))
														
 
															+		return 1;
														
 
															+
														
 
															+	list_for_each(cur, &info->fs_devices->devices) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (!device->bdev)
														
 
															+			continue;
														
 
															+		bdi = blk_get_backing_dev_info(device->bdev);
														
 
															+		if (bdi && bdi_congested(bdi, bdi_bits)) {
														
 
															+			ret = 1;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this unplugs every device on the box, and it is only used when page
														
 
															+ * is null
														
 
															+ */
														
 
															+static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
														
 
															+{
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_device *device;
														
 
															+	struct btrfs_fs_info *info;
														
 
															+
														
 
															+	info = (struct btrfs_fs_info *)bdi->unplug_io_data;
														
 
															+	list_for_each(cur, &info->fs_devices->devices) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		bdi = blk_get_backing_dev_info(device->bdev);
														
 
															+		if (bdi->unplug_io_fn) {
														
 
															+			bdi->unplug_io_fn(bdi, page);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	struct extent_map_tree *em_tree;
														
 
															+	struct extent_map *em;
														
 
															+	struct address_space *mapping;
														
 
															+	u64 offset;
														
 
															+
														
 
															+	/* the generic O_DIRECT read code does this */
														
 
															+	if (!page) {
														
 
															+		__unplug_io_fn(bdi, page);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * page->mapping may change at any time.  Get a consistent copy
														
 
															+	 * and use that for everything below
														
 
															+	 */
														
 
															+	smp_mb();
														
 
															+	mapping = page->mapping;
														
 
															+	if (!mapping)
														
 
															+		return;
														
 
															+
														
 
															+	inode = mapping->host;
														
 
															+	offset = page_offset(page);
														
 
															+
														
 
															+	em_tree = &BTRFS_I(inode)->extent_tree;
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+	if (!em) {
														
 
															+		__unplug_io_fn(bdi, page);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
														
 
															+		free_extent_map(em);
														
 
															+		__unplug_io_fn(bdi, page);
														
 
															+		return;
														
 
															+	}
														
 
															+	offset = offset - em->start;
														
 
															+	btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
														
 
															+			  em->block_start + offset, page);
														
 
															+	free_extent_map(em);
														
 
															+}
														
 
															+
														
 
															+static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
														
 
															+{
														
 
															+	bdi_init(bdi);
														
 
															+	bdi->ra_pages	= default_backing_dev_info.ra_pages;
														
 
															+	bdi->state		= 0;
														
 
															+	bdi->capabilities	= default_backing_dev_info.capabilities;
														
 
															+	bdi->unplug_io_fn	= btrfs_unplug_io_fn;
														
 
															+	bdi->unplug_io_data	= info;
														
 
															+	bdi->congested_fn	= btrfs_congested_fn;
														
 
															+	bdi->congested_data	= info;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int bio_ready_for_csum(struct bio *bio)
														
 
															+{
														
 
															+	u64 length = 0;
														
 
															+	u64 buf_len = 0;
														
 
															+	u64 start = 0;
														
 
															+	struct page *page;
														
 
															+	struct extent_io_tree *io_tree = NULL;
														
 
															+	struct btrfs_fs_info *info = NULL;
														
 
															+	struct bio_vec *bvec;
														
 
															+	int i;
														
 
															+	int ret;
														
 
															+
														
 
															+	bio_for_each_segment(bvec, bio, i) {
														
 
															+		page = bvec->bv_page;
														
 
															+		if (page->private == EXTENT_PAGE_PRIVATE) {
														
 
															+			length += bvec->bv_len;
														
 
															+			continue;
														
 
															+		}
														
 
															+		if (!page->private) {
														
 
															+			length += bvec->bv_len;
														
 
															+			continue;
														
 
															+		}
														
 
															+		length = bvec->bv_len;
														
 
															+		buf_len = page->private >> 2;
														
 
															+		start = page_offset(page) + bvec->bv_offset;
														
 
															+		io_tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+		info = BTRFS_I(page->mapping->host)->root->fs_info;
														
 
															+	}
														
 
															+	/* are we fully contained in this bio? */
														
 
															+	if (buf_len <= length)
														
 
															+		return 1;
														
 
															+
														
 
															+	ret = extent_range_uptodate(io_tree, start + length,
														
 
															+				    start + buf_len - 1);
														
 
															+	if (ret == 1)
														
 
															+		return ret;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * called by the kthread helper functions to finally call the bio end_io
														
 
															+ * functions.  This is where read checksum verification actually happens
														
 
															+ */
														
 
															+static void end_workqueue_fn(struct btrfs_work *work)
														
 
															+{
														
 
															+	struct bio *bio;
														
 
															+	struct end_io_wq *end_io_wq;
														
 
															+	struct btrfs_fs_info *fs_info;
														
 
															+	int error;
														
 
															+
														
 
															+	end_io_wq = container_of(work, struct end_io_wq, work);
														
 
															+	bio = end_io_wq->bio;
														
 
															+	fs_info = end_io_wq->info;
														
 
															+
														
 
															+	/* metadata bios are special because the whole tree block must
														
 
															+	 * be checksummed at once.  This makes sure the entire block is in
														
 
															+	 * ram and up to date before trying to verify things.  For
														
 
															+	 * blocksize <= pagesize, it is basically a noop
														
 
															+	 */
														
 
															+	if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
														
 
															+		btrfs_queue_worker(&fs_info->endio_workers,
														
 
															+				   &end_io_wq->work);
														
 
															+		return;
														
 
															+	}
														
 
															+	error = end_io_wq->error;
														
 
															+	bio->bi_private = end_io_wq->private;
														
 
															+	bio->bi_end_io = end_io_wq->end_io;
														
 
															+	kfree(end_io_wq);
														
 
															+	bio_endio(bio, error);
														
 
															+}
														
 
															+
														
 
															+static int cleaner_kthread(void *arg)
														
 
															+{
														
 
															+	struct btrfs_root *root = arg;
														
 
															+
														
 
															+	do {
														
 
															+		smp_mb();
														
 
															+		if (root->fs_info->closing)
														
 
															+			break;
														
 
															+
														
 
															+		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
														
 
															+		mutex_lock(&root->fs_info->cleaner_mutex);
														
 
															+		btrfs_clean_old_snapshots(root);
														
 
															+		mutex_unlock(&root->fs_info->cleaner_mutex);
														
 
															+
														
 
															+		if (freezing(current)) {
														
 
															+			refrigerator();
														
 
															+		} else {
														
 
															+			smp_mb();
														
 
															+			if (root->fs_info->closing)
														
 
															+				break;
														
 
															+			set_current_state(TASK_INTERRUPTIBLE);
														
 
															+			schedule();
														
 
															+			__set_current_state(TASK_RUNNING);
														
 
															+		}
														
 
															+	} while (!kthread_should_stop());
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int transaction_kthread(void *arg)
														
 
															+{
														
 
															+	struct btrfs_root *root = arg;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_transaction *cur;
														
 
															+	unsigned long now;
														
 
															+	unsigned long delay;
														
 
															+	int ret;
														
 
															+
														
 
															+	do {
														
 
															+		smp_mb();
														
 
															+		if (root->fs_info->closing)
														
 
															+			break;
														
 
															+
														
 
															+		delay = HZ * 30;
														
 
															+		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
														
 
															+		mutex_lock(&root->fs_info->transaction_kthread_mutex);
														
 
															+
														
 
															+		if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
														
 
															+			printk("btrfs: total reference cache size %Lu\n",
														
 
															+				root->fs_info->total_ref_cache_size);
														
 
															+		}
														
 
															+
														
 
															+		mutex_lock(&root->fs_info->trans_mutex);
														
 
															+		cur = root->fs_info->running_transaction;
														
 
															+		if (!cur) {
														
 
															+			mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+			goto sleep;
														
 
															+		}
														
 
															+
														
 
															+		now = get_seconds();
														
 
															+		if (now < cur->start_time || now - cur->start_time < 30) {
														
 
															+			mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+			delay = HZ * 5;
														
 
															+			goto sleep;
														
 
															+		}
														
 
															+		mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+		trans = btrfs_start_transaction(root, 1);
														
 
															+		ret = btrfs_commit_transaction(trans, root);
														
 
															+sleep:
														
 
															+		wake_up_process(root->fs_info->cleaner_kthread);
														
 
															+		mutex_unlock(&root->fs_info->transaction_kthread_mutex);
														
 
															+
														
 
															+		if (freezing(current)) {
														
 
															+			refrigerator();
														
 
															+		} else {
														
 
															+			if (root->fs_info->closing)
														
 
															+				break;
														
 
															+			set_current_state(TASK_INTERRUPTIBLE);
														
 
															+			schedule_timeout(delay);
														
 
															+			__set_current_state(TASK_RUNNING);
														
 
															+		}
														
 
															+	} while (!kthread_should_stop());
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_root *open_ctree(struct super_block *sb,
														
 
															+			      struct btrfs_fs_devices *fs_devices,
														
 
															+			      char *options)
														
 
															+{
														
 
															+	u32 sectorsize;
														
 
															+	u32 nodesize;
														
 
															+	u32 leafsize;
														
 
															+	u32 blocksize;
														
 
															+	u32 stripesize;
														
 
															+	struct buffer_head *bh;
														
 
															+	struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
														
 
															+						 GFP_NOFS);
														
 
															+	struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
														
 
															+					       GFP_NOFS);
														
 
															+	struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
														
 
															+						GFP_NOFS);
														
 
															+	struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
														
 
															+						GFP_NOFS);
														
 
															+	struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
														
 
															+					      GFP_NOFS);
														
 
															+	struct btrfs_root *log_tree_root;
														
 
															+
														
 
															+	int ret;
														
 
															+	int err = -EINVAL;
														
 
															+
														
 
															+	struct btrfs_super_block *disk_super;
														
 
															+
														
 
															+	if (!extent_root || !tree_root || !fs_info ||
														
 
															+	    !chunk_root || !dev_root) {
														
 
															+		err = -ENOMEM;
														
 
															+		goto fail;
														
 
															+	}
														
 
															+	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
														
 
															+	INIT_LIST_HEAD(&fs_info->trans_list);
														
 
															+	INIT_LIST_HEAD(&fs_info->dead_roots);
														
 
															+	INIT_LIST_HEAD(&fs_info->hashers);
														
 
															+	INIT_LIST_HEAD(&fs_info->delalloc_inodes);
														
 
															+	spin_lock_init(&fs_info->hash_lock);
														
 
															+	spin_lock_init(&fs_info->delalloc_lock);
														
 
															+	spin_lock_init(&fs_info->new_trans_lock);
														
 
															+	spin_lock_init(&fs_info->ref_cache_lock);
														
 
															+
														
 
															+	init_completion(&fs_info->kobj_unregister);
														
 
															+	fs_info->tree_root = tree_root;
														
 
															+	fs_info->extent_root = extent_root;
														
 
															+	fs_info->chunk_root = chunk_root;
														
 
															+	fs_info->dev_root = dev_root;
														
 
															+	fs_info->fs_devices = fs_devices;
														
 
															+	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
														
 
															+	INIT_LIST_HEAD(&fs_info->space_info);
														
 
															+	btrfs_mapping_init(&fs_info->mapping_tree);
														
 
															+	atomic_set(&fs_info->nr_async_submits, 0);
														
 
															+	atomic_set(&fs_info->async_submit_draining, 0);
														
 
															+	atomic_set(&fs_info->nr_async_bios, 0);
														
 
															+	atomic_set(&fs_info->throttles, 0);
														
 
															+	atomic_set(&fs_info->throttle_gen, 0);
														
 
															+	fs_info->sb = sb;
														
 
															+	fs_info->max_extent = (u64)-1;
														
 
															+	fs_info->max_inline = 8192 * 1024;
														
 
															+	setup_bdi(fs_info, &fs_info->bdi);
														
 
															+	fs_info->btree_inode = new_inode(sb);
														
 
															+	fs_info->btree_inode->i_ino = 1;
														
 
															+	fs_info->btree_inode->i_nlink = 1;
														
 
															+	fs_info->thread_pool_size = min(num_online_cpus() + 2, 8);
														
 
															+
														
 
															+	INIT_LIST_HEAD(&fs_info->ordered_extents);
														
 
															+	spin_lock_init(&fs_info->ordered_extent_lock);
														
 
															+
														
 
															+	sb->s_blocksize = 4096;
														
 
															+	sb->s_blocksize_bits = blksize_bits(4096);
														
 
															+
														
 
															+	/*
														
 
															+	 * we set the i_size on the btree inode to the max possible int.
														
 
															+	 * the real end of the address space is determined by all of
														
 
															+	 * the devices in the system
														
 
															+	 */
														
 
															+	fs_info->btree_inode->i_size = OFFSET_MAX;
														
 
															+	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
														
 
															+	fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
														
 
															+
														
 
															+	extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
														
 
															+			     fs_info->btree_inode->i_mapping,
														
 
															+			     GFP_NOFS);
														
 
															+	extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
														
 
															+			     GFP_NOFS);
														
 
															+
														
 
															+	BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
														
 
															+
														
 
															+	spin_lock_init(&fs_info->block_group_cache_lock);
														
 
															+	fs_info->block_group_cache_tree.rb_node = NULL;
														
 
															+
														
 
															+	extent_io_tree_init(&fs_info->pinned_extents,
														
 
															+			     fs_info->btree_inode->i_mapping, GFP_NOFS);
														
 
															+	extent_io_tree_init(&fs_info->pending_del,
														
 
															+			     fs_info->btree_inode->i_mapping, GFP_NOFS);
														
 
															+	extent_io_tree_init(&fs_info->extent_ins,
														
 
															+			     fs_info->btree_inode->i_mapping, GFP_NOFS);
														
 
															+	fs_info->do_barriers = 1;
														
 
															+
														
 
															+	extent_io_tree_init(&fs_info->reloc_mapping_tree,
														
 
															+			    fs_info->btree_inode->i_mapping, GFP_NOFS);
														
 
															+	INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
														
 
															+	btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
														
 
															+	btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
														
 
															+
														
 
															+	BTRFS_I(fs_info->btree_inode)->root = tree_root;
														
 
															+	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
														
 
															+	       sizeof(struct btrfs_key));
														
 
															+	insert_inode_hash(fs_info->btree_inode);
														
 
															+
														
 
															+	mutex_init(&fs_info->trans_mutex);
														
 
															+	mutex_init(&fs_info->tree_log_mutex);
														
 
															+	mutex_init(&fs_info->drop_mutex);
														
 
															+	mutex_init(&fs_info->alloc_mutex);
														
 
															+	mutex_init(&fs_info->chunk_mutex);
														
 
															+	mutex_init(&fs_info->transaction_kthread_mutex);
														
 
															+	mutex_init(&fs_info->cleaner_mutex);
														
 
															+	mutex_init(&fs_info->volume_mutex);
														
 
															+	mutex_init(&fs_info->tree_reloc_mutex);
														
 
															+	init_waitqueue_head(&fs_info->transaction_throttle);
														
 
															+	init_waitqueue_head(&fs_info->transaction_wait);
														
 
															+	init_waitqueue_head(&fs_info->async_submit_wait);
														
 
															+	init_waitqueue_head(&fs_info->tree_log_wait);
														
 
															+	atomic_set(&fs_info->tree_log_commit, 0);
														
 
															+	atomic_set(&fs_info->tree_log_writers, 0);
														
 
															+	fs_info->tree_log_transid = 0;
														
 
															+
														
 
															+#if 0
														
 
															+	ret = add_hasher(fs_info, "crc32c");
														
 
															+	if (ret) {
														
 
															+		printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
														
 
															+		err = -ENOMEM;
														
 
															+		goto fail_iput;
														
 
															+	}
														
 
															+#endif
														
 
															+	__setup_root(4096, 4096, 4096, 4096, tree_root,
														
 
															+		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
														
 
															+
														
 
															+
														
 
															+	bh = __bread(fs_devices->latest_bdev,
														
 
															+		     BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
														
 
															+	if (!bh)
														
 
															+		goto fail_iput;
														
 
															+
														
 
															+	memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
														
 
															+	brelse(bh);
														
 
															+
														
 
															+	memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE);
														
 
															+
														
 
															+	disk_super = &fs_info->super_copy;
														
 
															+	if (!btrfs_super_root(disk_super))
														
 
															+		goto fail_sb_buffer;
														
 
															+
														
 
															+	err = btrfs_parse_options(tree_root, options);
														
 
															+	if (err)
														
 
															+		goto fail_sb_buffer;
														
 
															+
														
 
															+	/*
														
 
															+	 * we need to start all the end_io workers up front because the
														
 
															+	 * queue work function gets called at interrupt time, and so it
														
 
															+	 * cannot dynamically grow.
														
 
															+	 */
														
 
															+	btrfs_init_workers(&fs_info->workers, "worker",
														
 
															+			   fs_info->thread_pool_size);
														
 
															+	btrfs_init_workers(&fs_info->submit_workers, "submit",
														
 
															+			   min_t(u64, fs_devices->num_devices,
														
 
															+			   fs_info->thread_pool_size));
														
 
															+
														
 
															+	/* a higher idle thresh on the submit workers makes it much more
														
 
															+	 * likely that bios will be send down in a sane order to the
														
 
															+	 * devices
														
 
															+	 */
														
 
															+	fs_info->submit_workers.idle_thresh = 64;
														
 
															+
														
 
															+	/* fs_info->workers is responsible for checksumming file data
														
 
															+	 * blocks and metadata.  Using a larger idle thresh allows each
														
 
															+	 * worker thread to operate on things in roughly the order they
														
 
															+	 * were sent by the writeback daemons, improving overall locality
														
 
															+	 * of the IO going down the pipe.
														
 
															+	 */
														
 
															+	fs_info->workers.idle_thresh = 128;
														
 
															+
														
 
															+	btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
														
 
															+	btrfs_init_workers(&fs_info->endio_workers, "endio",
														
 
															+			   fs_info->thread_pool_size);
														
 
															+	btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
														
 
															+			   fs_info->thread_pool_size);
														
 
															+
														
 
															+	/*
														
 
															+	 * endios are largely parallel and should have a very
														
 
															+	 * low idle thresh
														
 
															+	 */
														
 
															+	fs_info->endio_workers.idle_thresh = 4;
														
 
															+	fs_info->endio_write_workers.idle_thresh = 64;
														
 
															+
														
 
															+	btrfs_start_workers(&fs_info->workers, 1);
														
 
															+	btrfs_start_workers(&fs_info->submit_workers, 1);
														
 
															+	btrfs_start_workers(&fs_info->fixup_workers, 1);
														
 
															+	btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
														
 
															+	btrfs_start_workers(&fs_info->endio_write_workers,
														
 
															+			    fs_info->thread_pool_size);
														
 
															+
														
 
															+	err = -EINVAL;
														
 
															+	if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
														
 
															+		printk("Btrfs: wanted %llu devices, but found %llu\n",
														
 
															+		       (unsigned long long)btrfs_super_num_devices(disk_super),
														
 
															+		       (unsigned long long)fs_devices->open_devices);
														
 
															+		if (btrfs_test_opt(tree_root, DEGRADED))
														
 
															+			printk("continuing in degraded mode\n");
														
 
															+		else {
														
 
															+			goto fail_sb_buffer;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
														
 
															+
														
 
															+	nodesize = btrfs_super_nodesize(disk_super);
														
 
															+	leafsize = btrfs_super_leafsize(disk_super);
														
 
															+	sectorsize = btrfs_super_sectorsize(disk_super);
														
 
															+	stripesize = btrfs_super_stripesize(disk_super);
														
 
															+	tree_root->nodesize = nodesize;
														
 
															+	tree_root->leafsize = leafsize;
														
 
															+	tree_root->sectorsize = sectorsize;
														
 
															+	tree_root->stripesize = stripesize;
														
 
															+
														
 
															+	sb->s_blocksize = sectorsize;
														
 
															+	sb->s_blocksize_bits = blksize_bits(sectorsize);
														
 
															+
														
 
															+	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
														
 
															+		    sizeof(disk_super->magic))) {
														
 
															+		printk("btrfs: valid FS not found on %s\n", sb->s_id);
														
 
															+		goto fail_sb_buffer;
														
 
															+	}
														
 
															+
														
 
															+	mutex_lock(&fs_info->chunk_mutex);
														
 
															+	ret = btrfs_read_sys_array(tree_root);
														
 
															+	mutex_unlock(&fs_info->chunk_mutex);
														
 
															+	if (ret) {
														
 
															+		printk("btrfs: failed to read the system array on %s\n",
														
 
															+		       sb->s_id);
														
 
															+		goto fail_sys_array;
														
 
															+	}
														
 
															+
														
 
															+	blocksize = btrfs_level_size(tree_root,
														
 
															+				     btrfs_super_chunk_root_level(disk_super));
														
 
															+
														
 
															+	__setup_root(nodesize, leafsize, sectorsize, stripesize,
														
 
															+		     chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
														
 
															+
														
 
															+	chunk_root->node = read_tree_block(chunk_root,
														
 
															+					   btrfs_super_chunk_root(disk_super),
														
 
															+					   blocksize, 0);
														
 
															+	BUG_ON(!chunk_root->node);
														
 
															+
														
 
															+	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
														
 
															+	         (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
														
 
															+		 BTRFS_UUID_SIZE);
														
 
															+
														
 
															+	mutex_lock(&fs_info->chunk_mutex);
														
 
															+	ret = btrfs_read_chunk_tree(chunk_root);
														
 
															+	mutex_unlock(&fs_info->chunk_mutex);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_close_extra_devices(fs_devices);
														
 
															+
														
 
															+	blocksize = btrfs_level_size(tree_root,
														
 
															+				     btrfs_super_root_level(disk_super));
														
 
															+
														
 
															+
														
 
															+	tree_root->node = read_tree_block(tree_root,
														
 
															+					  btrfs_super_root(disk_super),
														
 
															+					  blocksize, 0);
														
 
															+	if (!tree_root->node)
														
 
															+		goto fail_sb_buffer;
														
 
															+
														
 
															+
														
 
															+	ret = find_and_setup_root(tree_root, fs_info,
														
 
															+				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
														
 
															+	if (ret)
														
 
															+		goto fail_tree_root;
														
 
															+	extent_root->track_dirty = 1;
														
 
															+
														
 
															+	ret = find_and_setup_root(tree_root, fs_info,
														
 
															+				  BTRFS_DEV_TREE_OBJECTID, dev_root);
														
 
															+	dev_root->track_dirty = 1;
														
 
															+
														
 
															+	if (ret)
														
 
															+		goto fail_extent_root;
														
 
															+
														
 
															+	btrfs_read_block_groups(extent_root);
														
 
															+
														
 
															+	fs_info->generation = btrfs_super_generation(disk_super) + 1;
														
 
															+	fs_info->data_alloc_profile = (u64)-1;
														
 
															+	fs_info->metadata_alloc_profile = (u64)-1;
														
 
															+	fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
														
 
															+	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
														
 
															+					       "btrfs-cleaner");
														
 
															+	if (!fs_info->cleaner_kthread)
														
 
															+		goto fail_extent_root;
														
 
															+
														
 
															+	fs_info->transaction_kthread = kthread_run(transaction_kthread,
														
 
															+						   tree_root,
														
 
															+						   "btrfs-transaction");
														
 
															+	if (!fs_info->transaction_kthread)
														
 
															+		goto fail_cleaner;
														
 
															+
														
 
															+	if (btrfs_super_log_root(disk_super) != 0) {
														
 
															+		u32 blocksize;
														
 
															+		u64 bytenr = btrfs_super_log_root(disk_super);
														
 
															+
														
 
															+		blocksize =
														
 
															+		     btrfs_level_size(tree_root,
														
 
															+				      btrfs_super_log_root_level(disk_super));
														
 
															+
														
 
															+		log_tree_root = kzalloc(sizeof(struct btrfs_root),
														
 
															+						      GFP_NOFS);
														
 
															+
														
 
															+		__setup_root(nodesize, leafsize, sectorsize, stripesize,
														
 
															+			     log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
														
 
															+
														
 
															+		log_tree_root->node = read_tree_block(tree_root, bytenr,
														
 
															+						      blocksize, 0);
														
 
															+		ret = btrfs_recover_log_trees(log_tree_root);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_cleanup_reloc_trees(tree_root);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	fs_info->last_trans_committed = btrfs_super_generation(disk_super);
														
 
															+	return tree_root;
														
 
															+
														
 
															+fail_cleaner:
														
 
															+	kthread_stop(fs_info->cleaner_kthread);
														
 
															+fail_extent_root:
														
 
															+	free_extent_buffer(extent_root->node);
														
 
															+fail_tree_root:
														
 
															+	free_extent_buffer(tree_root->node);
														
 
															+fail_sys_array:
														
 
															+fail_sb_buffer:
														
 
															+	btrfs_stop_workers(&fs_info->fixup_workers);
														
 
															+	btrfs_stop_workers(&fs_info->workers);
														
 
															+	btrfs_stop_workers(&fs_info->endio_workers);
														
 
															+	btrfs_stop_workers(&fs_info->endio_write_workers);
														
 
															+	btrfs_stop_workers(&fs_info->submit_workers);
														
 
															+fail_iput:
														
 
															+	iput(fs_info->btree_inode);
														
 
															+fail:
														
 
															+	btrfs_close_devices(fs_info->fs_devices);
														
 
															+	btrfs_mapping_tree_free(&fs_info->mapping_tree);
														
 
															+
														
 
															+	kfree(extent_root);
														
 
															+	kfree(tree_root);
														
 
															+	bdi_destroy(&fs_info->bdi);
														
 
															+	kfree(fs_info);
														
 
															+	kfree(chunk_root);
														
 
															+	kfree(dev_root);
														
 
															+	return ERR_PTR(err);
														
 
															+}
														
 
															+
														
 
															+static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
														
 
															+{
														
 
															+	char b[BDEVNAME_SIZE];
														
 
															+
														
 
															+	if (uptodate) {
														
 
															+		set_buffer_uptodate(bh);
														
 
															+	} else {
														
 
															+		if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
														
 
															+			printk(KERN_WARNING "lost page write due to "
														
 
															+					"I/O error on %s\n",
														
 
															+				       bdevname(bh->b_bdev, b));
														
 
															+		}
														
 
															+		/* note, we dont' set_buffer_write_io_error because we have
														
 
															+		 * our own ways of dealing with the IO errors
														
 
															+		 */
														
 
															+		clear_buffer_uptodate(bh);
														
 
															+	}
														
 
															+	unlock_buffer(bh);
														
 
															+	put_bh(bh);
														
 
															+}
														
 
															+
														
 
															+int write_all_supers(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct list_head *cur;
														
 
															+	struct list_head *head = &root->fs_info->fs_devices->devices;
														
 
															+	struct btrfs_device *dev;
														
 
															+	struct btrfs_super_block *sb;
														
 
															+	struct btrfs_dev_item *dev_item;
														
 
															+	struct buffer_head *bh;
														
 
															+	int ret;
														
 
															+	int do_barriers;
														
 
															+	int max_errors;
														
 
															+	int total_errors = 0;
														
 
															+	u32 crc;
														
 
															+	u64 flags;
														
 
															+
														
 
															+	max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
														
 
															+	do_barriers = !btrfs_test_opt(root, NOBARRIER);
														
 
															+
														
 
															+	sb = &root->fs_info->super_for_commit;
														
 
															+	dev_item = &sb->dev_item;
														
 
															+	list_for_each(cur, head) {
														
 
															+		dev = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (!dev->bdev) {
														
 
															+			total_errors++;
														
 
															+			continue;
														
 
															+		}
														
 
															+		if (!dev->in_fs_metadata)
														
 
															+			continue;
														
 
															+
														
 
															+		btrfs_set_stack_device_type(dev_item, dev->type);
														
 
															+		btrfs_set_stack_device_id(dev_item, dev->devid);
														
 
															+		btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
														
 
															+		btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
														
 
															+		btrfs_set_stack_device_io_align(dev_item, dev->io_align);
														
 
															+		btrfs_set_stack_device_io_width(dev_item, dev->io_width);
														
 
															+		btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
														
 
															+		memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
														
 
															+		flags = btrfs_super_flags(sb);
														
 
															+		btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
														
 
															+
														
 
															+
														
 
															+		crc = ~(u32)0;
														
 
															+		crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc,
														
 
															+				      BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
														
 
															+		btrfs_csum_final(crc, sb->csum);
														
 
															+
														
 
															+		bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096,
														
 
															+			      BTRFS_SUPER_INFO_SIZE);
														
 
															+
														
 
															+		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
														
 
															+		dev->pending_io = bh;
														
 
															+
														
 
															+		get_bh(bh);
														
 
															+		set_buffer_uptodate(bh);
														
 
															+		lock_buffer(bh);
														
 
															+		bh->b_end_io = btrfs_end_buffer_write_sync;
														
 
															+
														
 
															+		if (do_barriers && dev->barriers) {
														
 
															+			ret = submit_bh(WRITE_BARRIER, bh);
														
 
															+			if (ret == -EOPNOTSUPP) {
														
 
															+				printk("btrfs: disabling barriers on dev %s\n",
														
 
															+				       dev->name);
														
 
															+				set_buffer_uptodate(bh);
														
 
															+				dev->barriers = 0;
														
 
															+				get_bh(bh);
														
 
															+				lock_buffer(bh);
														
 
															+				ret = submit_bh(WRITE, bh);
														
 
															+			}
														
 
															+		} else {
														
 
															+			ret = submit_bh(WRITE, bh);
														
 
															+		}
														
 
															+		if (ret)
														
 
															+			total_errors++;
														
 
															+	}
														
 
															+	if (total_errors > max_errors) {
														
 
															+		printk("btrfs: %d errors while writing supers\n", total_errors);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	total_errors = 0;
														
 
															+
														
 
															+	list_for_each(cur, head) {
														
 
															+		dev = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (!dev->bdev)
														
 
															+			continue;
														
 
															+		if (!dev->in_fs_metadata)
														
 
															+			continue;
														
 
															+
														
 
															+		BUG_ON(!dev->pending_io);
														
 
															+		bh = dev->pending_io;
														
 
															+		wait_on_buffer(bh);
														
 
															+		if (!buffer_uptodate(dev->pending_io)) {
														
 
															+			if (do_barriers && dev->barriers) {
														
 
															+				printk("btrfs: disabling barriers on dev %s\n",
														
 
															+				       dev->name);
														
 
															+				set_buffer_uptodate(bh);
														
 
															+				get_bh(bh);
														
 
															+				lock_buffer(bh);
														
 
															+				dev->barriers = 0;
														
 
															+				ret = submit_bh(WRITE, bh);
														
 
															+				BUG_ON(ret);
														
 
															+				wait_on_buffer(bh);
														
 
															+				if (!buffer_uptodate(bh))
														
 
															+					total_errors++;
														
 
															+			} else {
														
 
															+				total_errors++;
														
 
															+			}
														
 
															+
														
 
															+		}
														
 
															+		dev->pending_io = NULL;
														
 
															+		brelse(bh);
														
 
															+	}
														
 
															+	if (total_errors > max_errors) {
														
 
															+		printk("btrfs: %d errors while writing supers\n", total_errors);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = write_all_supers(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
														
 
															+{
														
 
															+	radix_tree_delete(&fs_info->fs_roots_radix,
														
 
															+			  (unsigned long)root->root_key.objectid);
														
 
															+	if (root->in_sysfs)
														
 
															+		btrfs_sysfs_del_root(root);
														
 
															+	if (root->inode)
														
 
															+		iput(root->inode);
														
 
															+	if (root->node)
														
 
															+		free_extent_buffer(root->node);
														
 
															+	if (root->commit_root)
														
 
															+		free_extent_buffer(root->commit_root);
														
 
															+	if (root->name)
														
 
															+		kfree(root->name);
														
 
															+	kfree(root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int del_fs_roots(struct btrfs_fs_info *fs_info)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_root *gang[8];
														
 
															+	int i;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
														
 
															+					     (void **)gang, 0,
														
 
															+					     ARRAY_SIZE(gang));
														
 
															+		if (!ret)
														
 
															+			break;
														
 
															+		for (i = 0; i < ret; i++)
														
 
															+			btrfs_free_fs_root(fs_info, gang[i]);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int close_ctree(struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_fs_info *fs_info = root->fs_info;
														
 
															+
														
 
															+	fs_info->closing = 1;
														
 
															+	smp_mb();
														
 
															+
														
 
															+	kthread_stop(root->fs_info->transaction_kthread);
														
 
															+	kthread_stop(root->fs_info->cleaner_kthread);
														
 
															+
														
 
															+	btrfs_clean_old_snapshots(root);
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	ret = btrfs_commit_transaction(trans, root);
														
 
															+	/* run commit again to  drop the original snapshot */
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_commit_transaction(trans, root);
														
 
															+	ret = btrfs_write_and_wait_transaction(NULL, root);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	write_ctree_super(NULL, root);
														
 
															+
														
 
															+	if (fs_info->delalloc_bytes) {
														
 
															+		printk("btrfs: at unmount delalloc count %Lu\n",
														
 
															+		       fs_info->delalloc_bytes);
														
 
															+	}
														
 
															+	if (fs_info->total_ref_cache_size) {
														
 
															+		printk("btrfs: at umount reference cache size %Lu\n",
														
 
															+			fs_info->total_ref_cache_size);
														
 
															+	}
														
 
															+
														
 
															+	if (fs_info->extent_root->node)
														
 
															+		free_extent_buffer(fs_info->extent_root->node);
														
 
															+
														
 
															+	if (fs_info->tree_root->node)
														
 
															+		free_extent_buffer(fs_info->tree_root->node);
														
 
															+
														
 
															+	if (root->fs_info->chunk_root->node);
														
 
															+		free_extent_buffer(root->fs_info->chunk_root->node);
														
 
															+
														
 
															+	if (root->fs_info->dev_root->node);
														
 
															+		free_extent_buffer(root->fs_info->dev_root->node);
														
 
															+
														
 
															+	btrfs_free_block_groups(root->fs_info);
														
 
															+	fs_info->closing = 2;
														
 
															+	del_fs_roots(fs_info);
														
 
															+
														
 
															+	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
														
 
															+
														
 
															+	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
														
 
															+
														
 
															+	btrfs_stop_workers(&fs_info->fixup_workers);
														
 
															+	btrfs_stop_workers(&fs_info->workers);
														
 
															+	btrfs_stop_workers(&fs_info->endio_workers);
														
 
															+	btrfs_stop_workers(&fs_info->endio_write_workers);
														
 
															+	btrfs_stop_workers(&fs_info->submit_workers);
														
 
															+
														
 
															+	iput(fs_info->btree_inode);
														
 
															+#if 0
														
 
															+	while(!list_empty(&fs_info->hashers)) {
														
 
															+		struct btrfs_hasher *hasher;
														
 
															+		hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
														
 
															+				    hashers);
														
 
															+		list_del(&hasher->hashers);
														
 
															+		crypto_free_hash(&fs_info->hash_tfm);
														
 
															+		kfree(hasher);
														
 
															+	}
														
 
															+#endif
														
 
															+	btrfs_close_devices(fs_info->fs_devices);
														
 
															+	btrfs_mapping_tree_free(&fs_info->mapping_tree);
														
 
															+
														
 
															+	bdi_destroy(&fs_info->bdi);
														
 
															+
														
 
															+	kfree(fs_info->extent_root);
														
 
															+	kfree(fs_info->tree_root);
														
 
															+	kfree(fs_info->chunk_root);
														
 
															+	kfree(fs_info->dev_root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct inode *btree_inode = buf->first_page->mapping->host;
														
 
															+
														
 
															+	ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);
														
 
															+	if (!ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
														
 
															+				    parent_transid);
														
 
															+	return !ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
														
 
															+{
														
 
															+	struct inode *btree_inode = buf->first_page->mapping->host;
														
 
															+	return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
														
 
															+					  buf);
														
 
															+}
														
 
															+
														
 
															+void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
														
 
															+	u64 transid = btrfs_header_generation(buf);
														
 
															+	struct inode *btree_inode = root->fs_info->btree_inode;
														
 
															+
														
 
															+	WARN_ON(!btrfs_tree_locked(buf));
														
 
															+	if (transid != root->fs_info->generation) {
														
 
															+		printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
														
 
															+			(unsigned long long)buf->start,
														
 
															+			transid, root->fs_info->generation);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
														
 
															+}
														
 
															+
														
 
															+void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
														
 
															+{
														
 
															+	/*
														
 
															+	 * looks as though older kernels can get into trouble with
														
 
															+	 * this code, they end up stuck in balance_dirty_pages forever
														
 
															+	 */
														
 
															+	struct extent_io_tree *tree;
														
 
															+	u64 num_dirty;
														
 
															+	u64 start = 0;
														
 
															+	unsigned long thresh = 96 * 1024 * 1024;
														
 
															+	tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
														
 
															+
														
 
															+	if (current_is_pdflush() || current->flags & PF_MEMALLOC)
														
 
															+		return;
														
 
															+
														
 
															+	num_dirty = count_range_bits(tree, &start, (u64)-1,
														
 
															+				     thresh, EXTENT_DIRTY);
														
 
															+	if (num_dirty > thresh) {
														
 
															+		balance_dirty_pages_ratelimited_nr(
														
 
															+				   root->fs_info->btree_inode->i_mapping, 1);
														
 
															+	}
														
 
															+	return;
														
 
															+}
														
 
															+
														
 
															+int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
														
 
															+	int ret;
														
 
															+	ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
														
 
															+	if (ret == 0) {
														
 
															+		buf->flags |= EXTENT_UPTODATE;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btree_lock_page_hook(struct page *page)
														
 
															+{
														
 
															+	struct inode *inode = page->mapping->host;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	struct extent_buffer *eb;
														
 
															+	unsigned long len;
														
 
															+	u64 bytenr = page_offset(page);
														
 
															+
														
 
															+	if (page->private == EXTENT_PAGE_PRIVATE)
														
 
															+		goto out;
														
 
															+
														
 
															+	len = page->private >> 2;
														
 
															+	eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS);
														
 
															+	if (!eb)
														
 
															+		goto out;
														
 
															+
														
 
															+	btrfs_tree_lock(eb);
														
 
															+	spin_lock(&root->fs_info->hash_lock);
														
 
															+	btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
														
 
															+	spin_unlock(&root->fs_info->hash_lock);
														
 
															+	btrfs_tree_unlock(eb);
														
 
															+	free_extent_buffer(eb);
														
 
															+out:
														
 
															+	lock_page(page);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static struct extent_io_ops btree_extent_io_ops = {
														
 
															+	.write_cache_pages_lock_hook = btree_lock_page_hook,
														
 
															+	.readpage_end_io_hook = btree_readpage_end_io_hook,
														
 
															+	.submit_bio_hook = btree_submit_bio_hook,
														
 
															+	/* note we're sharing with inode.c for the merge bio hook */
														
 
															+	.merge_bio_hook = btrfs_merge_bio_hook,
														
 
															+};
														
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -0,0 +1,84 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __DISKIO__
														
 
															+#define __DISKIO__
														
 
															+
														
 
															+#define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
														
 
															+#define BTRFS_SUPER_INFO_SIZE 4096
														
 
															+struct btrfs_device;
														
 
															+struct btrfs_fs_devices;
														
 
															+
														
 
															+struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
														
 
															+				      u32 blocksize, u64 parent_transid);
														
 
															+int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
														
 
															+			 u64 parent_transid);
														
 
															+struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
														
 
															+						   u64 bytenr, u32 blocksize);
														
 
															+int clean_tree_block(struct btrfs_trans_handle *trans,
														
 
															+		     struct btrfs_root *root, struct extent_buffer *buf);
														
 
															+struct btrfs_root *open_ctree(struct super_block *sb,
														
 
															+			      struct btrfs_fs_devices *fs_devices,
														
 
															+			      char *options);
														
 
															+int close_ctree(struct btrfs_root *root);
														
 
															+int write_ctree_super(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *root);
														
 
															+struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
														
 
															+					    u64 bytenr, u32 blocksize);
														
 
															+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
														
 
															+					u64 root_objectid);
														
 
															+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
														
 
															+				      struct btrfs_key *location,
														
 
															+				      const char *name, int namelen);
														
 
															+struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
														
 
															+					       struct btrfs_key *location);
														
 
															+struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
														
 
															+					      struct btrfs_key *location);
														
 
															+int btrfs_insert_dev_radix(struct btrfs_root *root,
														
 
															+			   struct block_device *bdev,
														
 
															+			   u64 device_id,
														
 
															+			   u64 block_start,
														
 
															+			   u64 num_blocks);
														
 
															+void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
														
 
															+int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
														
 
															+void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
														
 
															+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
														
 
															+int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
														
 
															+int wait_on_tree_block_writeback(struct btrfs_root *root,
														
 
															+				 struct extent_buffer *buf);
														
 
															+int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
														
 
															+u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
														
 
															+void btrfs_csum_final(u32 crc, char *result);
														
 
															+int btrfs_open_device(struct btrfs_device *dev);
														
 
															+int btrfs_verify_block_csum(struct btrfs_root *root,
														
 
															+			    struct extent_buffer *buf);
														
 
															+int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
														
 
															+			int metadata);
														
 
															+int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
														
 
															+			int rw, struct bio *bio, int mirror_num,
														
 
															+			extent_submit_bio_hook_t *submit_bio_hook);
														
 
															+int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
														
 
															+unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
														
 
															+int btrfs_write_tree_block(struct extent_buffer *buf);
														
 
															+int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
														
 
															+int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_fs_info *fs_info);
														
 
															+int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_fs_info *fs_info);
														
 
															+int btree_lock_page_hook(struct page *page);
														
 
															+#endif
														
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -0,0 +1,201 @@
 
															+#include <linux/fs.h>
														
 
															+#include <linux/types.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "export.h"
														
 
															+#include "compat.h"
														
 
															+
														
 
															+#define BTRFS_FID_SIZE_NON_CONNECTABLE		(offsetof(struct btrfs_fid, parent_objectid)/4)
														
 
															+#define BTRFS_FID_SIZE_CONNECTABLE		(offsetof(struct btrfs_fid, parent_root_objectid)/4)
														
 
															+#define BTRFS_FID_SIZE_CONNECTABLE_ROOT		(sizeof(struct btrfs_fid)/4)
														
 
															+
														
 
															+static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
														
 
															+			   int connectable)
														
 
															+{
														
 
															+	struct btrfs_fid *fid = (struct btrfs_fid *)fh;
														
 
															+	struct inode *inode = dentry->d_inode;
														
 
															+	int len = *max_len;
														
 
															+	int type;
														
 
															+
														
 
															+	if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
														
 
															+	    (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
														
 
															+		return 255;
														
 
															+
														
 
															+	len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
														
 
															+	type = FILEID_BTRFS_WITHOUT_PARENT;
														
 
															+
														
 
															+	fid->objectid = BTRFS_I(inode)->location.objectid;
														
 
															+	fid->root_objectid = BTRFS_I(inode)->root->objectid;
														
 
															+	fid->gen = inode->i_generation;
														
 
															+
														
 
															+	if (connectable && !S_ISDIR(inode->i_mode)) {
														
 
															+		struct inode *parent;
														
 
															+		u64 parent_root_id;
														
 
															+
														
 
															+		spin_lock(&dentry->d_lock);
														
 
															+
														
 
															+		parent = dentry->d_parent->d_inode;
														
 
															+		fid->parent_objectid = BTRFS_I(parent)->location.objectid;
														
 
															+		fid->parent_gen = parent->i_generation;
														
 
															+		parent_root_id = BTRFS_I(parent)->root->objectid;
														
 
															+
														
 
															+		spin_unlock(&dentry->d_lock);
														
 
															+
														
 
															+		if (parent_root_id != fid->root_objectid) {
														
 
															+			fid->parent_root_objectid = parent_root_id;
														
 
															+			len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
														
 
															+			type = FILEID_BTRFS_WITH_PARENT_ROOT;
														
 
															+		} else {
														
 
															+			len = BTRFS_FID_SIZE_CONNECTABLE;
														
 
															+			type = FILEID_BTRFS_WITH_PARENT;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	*max_len = len;
														
 
															+	return type;
														
 
															+}
														
 
															+
														
 
															+static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
														
 
															+				       u64 root_objectid, u32 generation)
														
 
															+{
														
 
															+	struct btrfs_root *root;
														
 
															+	struct inode *inode;
														
 
															+	struct btrfs_key key;
														
 
															+
														
 
															+	key.objectid = root_objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+	root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
														
 
															+	if (IS_ERR(root))
														
 
															+		return ERR_CAST(root);
														
 
															+
														
 
															+	key.objectid = objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
														
 
															+	key.offset = 0;
														
 
															+
														
 
															+	inode = btrfs_iget(sb, &key, root, NULL);
														
 
															+	if (IS_ERR(inode))
														
 
															+		return (void *)inode;
														
 
															+
														
 
															+	if (generation != inode->i_generation) {
														
 
															+		iput(inode);
														
 
															+		return ERR_PTR(-ESTALE);
														
 
															+	}
														
 
															+
														
 
															+	return d_obtain_alias(inode);
														
 
															+}
														
 
															+
														
 
															+static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
														
 
															+					 int fh_len, int fh_type)
														
 
															+{
														
 
															+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
														
 
															+	u64 objectid, root_objectid;
														
 
															+	u32 generation;
														
 
															+
														
 
															+	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
														
 
															+		if (fh_len !=  BTRFS_FID_SIZE_CONNECTABLE)
														
 
															+			return NULL;
														
 
															+		root_objectid = fid->root_objectid;
														
 
															+	} else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
														
 
															+		if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
														
 
															+			return NULL;
														
 
															+		root_objectid = fid->parent_root_objectid;
														
 
															+	} else
														
 
															+		return NULL;
														
 
															+
														
 
															+	objectid = fid->parent_objectid;
														
 
															+	generation = fid->parent_gen;
														
 
															+
														
 
															+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
														
 
															+}
														
 
															+
														
 
															+static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
														
 
															+					 int fh_len, int fh_type)
														
 
															+{
														
 
															+	struct btrfs_fid *fid = (struct btrfs_fid *) fh;
														
 
															+	u64 objectid, root_objectid;
														
 
															+	u32 generation;
														
 
															+
														
 
															+	if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
														
 
															+	     fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
														
 
															+	    (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
														
 
															+	     fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
														
 
															+	    (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
														
 
															+	     fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
														
 
															+		return NULL;
														
 
															+
														
 
															+	objectid = fid->objectid;
														
 
															+	root_objectid = fid->root_objectid;
														
 
															+	generation = fid->gen;
														
 
															+
														
 
															+	return btrfs_get_dentry(sb, objectid, root_objectid, generation);
														
 
															+}
														
 
															+
														
 
															+static struct dentry *btrfs_get_parent(struct dentry *child)
														
 
															+{
														
 
															+	struct inode *dir = child->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int slot;
														
 
															+	u64 objectid;
														
 
															+	int ret;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+
														
 
															+	key.objectid = dir->i_ino;
														
 
															+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0) {
														
 
															+		/* Error */
														
 
															+		btrfs_free_path(path);
														
 
															+		return ERR_PTR(ret);
														
 
															+	}
														
 
															+	leaf = path->nodes[0];
														
 
															+	slot = path->slots[0];
														
 
															+	if (ret) {
														
 
															+		/* btrfs_search_slot() returns the slot where we'd want to
														
 
															+		   insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
														
 
															+		   The _real_ backref, telling us what the parent inode
														
 
															+		   _actually_ is, will be in the slot _before_ the one
														
 
															+		   that btrfs_search_slot() returns. */
														
 
															+		if (!slot) {
														
 
															+			/* Unless there is _no_ key in the tree before... */
														
 
															+			btrfs_free_path(path);
														
 
															+			return ERR_PTR(-EIO);
														
 
															+		}
														
 
															+		slot--;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_item_key_to_cpu(leaf, &key, slot);
														
 
															+	btrfs_free_path(path);
														
 
															+
														
 
															+	if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
														
 
															+		return ERR_PTR(-EINVAL);
														
 
															+
														
 
															+	objectid = key.offset;
														
 
															+
														
 
															+	/* If we are already at the root of a subvol, return the real root */
														
 
															+	if (objectid == dir->i_ino)
														
 
															+		return dget(dir->i_sb->s_root);
														
 
															+
														
 
															+	/* Build a new key for the inode item */
														
 
															+	key.objectid = objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
														
 
															+	key.offset = 0;
														
 
															+
														
 
															+	return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
														
 
															+}
														
 
															+
														
 
															+const struct export_operations btrfs_export_ops = {
														
 
															+	.encode_fh	= btrfs_encode_fh,
														
 
															+	.fh_to_dentry	= btrfs_fh_to_dentry,
														
 
															+	.fh_to_parent	= btrfs_fh_to_parent,
														
 
															+	.get_parent	= btrfs_get_parent,
														
 
															+};
														
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -0,0 +1,19 @@
 
															+#ifndef BTRFS_EXPORT_H
														
 
															+#define BTRFS_EXPORT_H
														
 
															+
														
 
															+#include <linux/exportfs.h>
														
 
															+
														
 
															+extern const struct export_operations btrfs_export_ops;
														
 
															+
														
 
															+struct btrfs_fid {
														
 
															+	u64 objectid;
														
 
															+	u64 root_objectid;
														
 
															+	u32 gen;
														
 
															+
														
 
															+	u64 parent_objectid;
														
 
															+	u32 parent_gen;
														
 
															+
														
 
															+	u64 parent_root_objectid;
														
 
															+} __attribute__ ((packed));
														
 
															+
														
 
															+#endif
														
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -0,0 +1,5253 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/blkdev.h>
														
 
															+#include "hash.h"
														
 
															+#include "crc32c.h"
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "transaction.h"
														
 
															+#include "volumes.h"
														
 
															+#include "locking.h"
														
 
															+#include "ref-cache.h"
														
 
															+
														
 
															+#define PENDING_EXTENT_INSERT 0
														
 
															+#define PENDING_EXTENT_DELETE 1
														
 
															+#define PENDING_BACKREF_UPDATE 2
														
 
															+
														
 
															+struct pending_extent_op {
														
 
															+	int type;
														
 
															+	u64 bytenr;
														
 
															+	u64 num_bytes;
														
 
															+	u64 parent;
														
 
															+	u64 orig_parent;
														
 
															+	u64 generation;
														
 
															+	u64 orig_generation;
														
 
															+	int level;
														
 
															+};
														
 
															+
														
 
															+static int finish_current_insert(struct btrfs_trans_handle *trans, struct
														
 
															+				 btrfs_root *extent_root);
														
 
															+static int del_pending_extents(struct btrfs_trans_handle *trans, struct
														
 
															+			       btrfs_root *extent_root);
														
 
															+static struct btrfs_block_group_cache *
														
 
															+__btrfs_find_block_group(struct btrfs_root *root,
														
 
															+			 struct btrfs_block_group_cache *hint,
														
 
															+			 u64 search_start, int data, int owner);
														
 
															+
														
 
															+void maybe_lock_mutex(struct btrfs_root *root)
														
 
															+{
														
 
															+	if (root != root->fs_info->extent_root &&
														
 
															+	    root != root->fs_info->chunk_root &&
														
 
															+	    root != root->fs_info->dev_root) {
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+void maybe_unlock_mutex(struct btrfs_root *root)
														
 
															+{
														
 
															+	if (root != root->fs_info->extent_root &&
														
 
															+	    root != root->fs_info->chunk_root &&
														
 
															+	    root != root->fs_info->dev_root) {
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
														
 
															+{
														
 
															+	return (cache->flags & bits) == bits;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this adds the block group to the fs_info rb tree for the block group
														
 
															+ * cache
														
 
															+ */
														
 
															+int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
														
 
															+				struct btrfs_block_group_cache *block_group)
														
 
															+{
														
 
															+	struct rb_node **p;
														
 
															+	struct rb_node *parent = NULL;
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	spin_lock(&info->block_group_cache_lock);
														
 
															+	p = &info->block_group_cache_tree.rb_node;
														
 
															+
														
 
															+	while (*p) {
														
 
															+		parent = *p;
														
 
															+		cache = rb_entry(parent, struct btrfs_block_group_cache,
														
 
															+				 cache_node);
														
 
															+		if (block_group->key.objectid < cache->key.objectid) {
														
 
															+			p = &(*p)->rb_left;
														
 
															+		} else if (block_group->key.objectid > cache->key.objectid) {
														
 
															+			p = &(*p)->rb_right;
														
 
															+		} else {
														
 
															+			spin_unlock(&info->block_group_cache_lock);
														
 
															+			return -EEXIST;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	rb_link_node(&block_group->cache_node, parent, p);
														
 
															+	rb_insert_color(&block_group->cache_node,
														
 
															+			&info->block_group_cache_tree);
														
 
															+	spin_unlock(&info->block_group_cache_lock);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This will return the block group at or after bytenr if contains is 0, else
														
 
															+ * it will return the block group that contains the bytenr
														
 
															+ */
														
 
															+static struct btrfs_block_group_cache *
														
 
															+block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
														
 
															+			      int contains)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache, *ret = NULL;
														
 
															+	struct rb_node *n;
														
 
															+	u64 end, start;
														
 
															+
														
 
															+	spin_lock(&info->block_group_cache_lock);
														
 
															+	n = info->block_group_cache_tree.rb_node;
														
 
															+
														
 
															+	while (n) {
														
 
															+		cache = rb_entry(n, struct btrfs_block_group_cache,
														
 
															+				 cache_node);
														
 
															+		end = cache->key.objectid + cache->key.offset - 1;
														
 
															+		start = cache->key.objectid;
														
 
															+
														
 
															+		if (bytenr < start) {
														
 
															+			if (!contains && (!ret || start < ret->key.objectid))
														
 
															+				ret = cache;
														
 
															+			n = n->rb_left;
														
 
															+		} else if (bytenr > start) {
														
 
															+			if (contains && bytenr <= end) {
														
 
															+				ret = cache;
														
 
															+				break;
														
 
															+			}
														
 
															+			n = n->rb_right;
														
 
															+		} else {
														
 
															+			ret = cache;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+	spin_unlock(&info->block_group_cache_lock);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this is only called by cache_block_group, since we could have freed extents
														
 
															+ * we need to check the pinned_extents for any extents that can't be used yet
														
 
															+ * since their free space will be released as soon as the transaction commits.
														
 
															+ */
														
 
															+static int add_new_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			      struct btrfs_fs_info *info, u64 start, u64 end)
														
 
															+{
														
 
															+	u64 extent_start, extent_end, size;
														
 
															+	int ret;
														
 
															+
														
 
															+	while (start < end) {
														
 
															+		ret = find_first_extent_bit(&info->pinned_extents, start,
														
 
															+					    &extent_start, &extent_end,
														
 
															+					    EXTENT_DIRTY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		if (extent_start == start) {
														
 
															+			start = extent_end + 1;
														
 
															+		} else if (extent_start > start && extent_start < end) {
														
 
															+			size = extent_start - start;
														
 
															+			ret = btrfs_add_free_space(block_group, start, size);
														
 
															+			BUG_ON(ret);
														
 
															+			start = extent_end + 1;
														
 
															+		} else {
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (start < end) {
														
 
															+		size = end - start;
														
 
															+		ret = btrfs_add_free_space(block_group, start, size);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int cache_block_group(struct btrfs_root *root,
														
 
															+			     struct btrfs_block_group_cache *block_group)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_key key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int slot;
														
 
															+	u64 last = 0;
														
 
															+	u64 first_free;
														
 
															+	int found = 0;
														
 
															+
														
 
															+	if (!block_group)
														
 
															+		return 0;
														
 
															+
														
 
															+	root = root->fs_info->extent_root;
														
 
															+
														
 
															+	if (block_group->cached)
														
 
															+		return 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	path->reada = 2;
														
 
															+	/*
														
 
															+	 * we get into deadlocks with paths held by callers of this function.
														
 
															+	 * since the alloc_mutex is protecting things right now, just
														
 
															+	 * skip the locking here
														
 
															+	 */
														
 
															+	path->skip_locking = 1;
														
 
															+	first_free = max_t(u64, block_group->key.objectid,
														
 
															+			   BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
														
 
															+	key.objectid = block_group->key.objectid;
														
 
															+	key.offset = 0;
														
 
															+	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto err;
														
 
															+	ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
														
 
															+	if (ret < 0)
														
 
															+		goto err;
														
 
															+	if (ret == 0) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
														
 
															+		if (key.objectid + key.offset > first_free)
														
 
															+			first_free = key.objectid + key.offset;
														
 
															+	}
														
 
															+	while(1) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		slot = path->slots[0];
														
 
															+		if (slot >= btrfs_header_nritems(leaf)) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto err;
														
 
															+			if (ret == 0)
														
 
															+				continue;
														
 
															+			else
														
 
															+				break;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, slot);
														
 
															+		if (key.objectid < block_group->key.objectid)
														
 
															+			goto next;
														
 
															+
														
 
															+		if (key.objectid >= block_group->key.objectid +
														
 
															+		    block_group->key.offset)
														
 
															+			break;
														
 
															+
														
 
															+		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
														
 
															+			if (!found) {
														
 
															+				last = first_free;
														
 
															+				found = 1;
														
 
															+			}
														
 
															+
														
 
															+			add_new_free_space(block_group, root->fs_info, last,
														
 
															+					   key.objectid);
														
 
															+
														
 
															+			last = key.objectid + key.offset;
														
 
															+		}
														
 
															+next:
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+
														
 
															+	if (!found)
														
 
															+		last = first_free;
														
 
															+
														
 
															+	add_new_free_space(block_group, root->fs_info, last,
														
 
															+			   block_group->key.objectid +
														
 
															+			   block_group->key.offset);
														
 
															+
														
 
															+	block_group->cached = 1;
														
 
															+	ret = 0;
														
 
															+err:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * return the block group that starts at or after bytenr
														
 
															+ */
														
 
															+struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
														
 
															+						       btrfs_fs_info *info,
														
 
															+							 u64 bytenr)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	cache = block_group_cache_tree_search(info, bytenr, 0);
														
 
															+
														
 
															+	return cache;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * return the block group that contains teh given bytenr
														
 
															+ */
														
 
															+struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
														
 
															+							 btrfs_fs_info *info,
														
 
															+							 u64 bytenr)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	cache = block_group_cache_tree_search(info, bytenr, 1);
														
 
															+
														
 
															+	return cache;
														
 
															+}
														
 
															+
														
 
															+static int noinline find_free_space(struct btrfs_root *root,
														
 
															+				    struct btrfs_block_group_cache **cache_ret,
														
 
															+				    u64 *start_ret, u64 num, int data)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_block_group_cache *cache = *cache_ret;
														
 
															+	struct btrfs_free_space *info = NULL;
														
 
															+	u64 last;
														
 
															+	u64 search_start = *start_ret;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	if (!cache)
														
 
															+		goto out;
														
 
															+
														
 
															+	last = max(search_start, cache->key.objectid);
														
 
															+
														
 
															+again:
														
 
															+	ret = cache_block_group(root, cache);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (cache->ro || !block_group_bits(cache, data))
														
 
															+		goto new_group;
														
 
															+
														
 
															+	info = btrfs_find_free_space(cache, last, num);
														
 
															+	if (info) {
														
 
															+		*start_ret = info->offset;
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+new_group:
														
 
															+	last = cache->key.objectid + cache->key.offset;
														
 
															+
														
 
															+	cache = btrfs_lookup_first_block_group(root->fs_info, last);
														
 
															+	if (!cache)
														
 
															+		goto out;
														
 
															+
														
 
															+	*cache_ret = cache;
														
 
															+	goto again;
														
 
															+
														
 
															+out:
														
 
															+	return -ENOSPC;
														
 
															+}
														
 
															+
														
 
															+static u64 div_factor(u64 num, int factor)
														
 
															+{
														
 
															+	if (factor == 10)
														
 
															+		return num;
														
 
															+	num *= factor;
														
 
															+	do_div(num, 10);
														
 
															+	return num;
														
 
															+}
														
 
															+
														
 
															+static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
														
 
															+						  u64 flags)
														
 
															+{
														
 
															+	struct list_head *head = &info->space_info;
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_space_info *found;
														
 
															+	list_for_each(cur, head) {
														
 
															+		found = list_entry(cur, struct btrfs_space_info, list);
														
 
															+		if (found->flags == flags)
														
 
															+			return found;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static struct btrfs_block_group_cache *
														
 
															+__btrfs_find_block_group(struct btrfs_root *root,
														
 
															+			 struct btrfs_block_group_cache *hint,
														
 
															+			 u64 search_start, int data, int owner)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+	struct btrfs_block_group_cache *found_group = NULL;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	u64 used;
														
 
															+	u64 last = 0;
														
 
															+	u64 free_check;
														
 
															+	int full_search = 0;
														
 
															+	int factor = 10;
														
 
															+	int wrapped = 0;
														
 
															+
														
 
															+	if (data & BTRFS_BLOCK_GROUP_METADATA)
														
 
															+		factor = 9;
														
 
															+
														
 
															+	if (search_start) {
														
 
															+		struct btrfs_block_group_cache *shint;
														
 
															+		shint = btrfs_lookup_first_block_group(info, search_start);
														
 
															+		if (shint && block_group_bits(shint, data) && !shint->ro) {
														
 
															+			spin_lock(&shint->lock);
														
 
															+			used = btrfs_block_group_used(&shint->item);
														
 
															+			if (used + shint->pinned + shint->reserved <
														
 
															+			    div_factor(shint->key.offset, factor)) {
														
 
															+				spin_unlock(&shint->lock);
														
 
															+				return shint;
														
 
															+			}
														
 
															+			spin_unlock(&shint->lock);
														
 
															+		}
														
 
															+	}
														
 
															+	if (hint && !hint->ro && block_group_bits(hint, data)) {
														
 
															+		spin_lock(&hint->lock);
														
 
															+		used = btrfs_block_group_used(&hint->item);
														
 
															+		if (used + hint->pinned + hint->reserved <
														
 
															+		    div_factor(hint->key.offset, factor)) {
														
 
															+			spin_unlock(&hint->lock);
														
 
															+			return hint;
														
 
															+		}
														
 
															+		spin_unlock(&hint->lock);
														
 
															+		last = hint->key.objectid + hint->key.offset;
														
 
															+	} else {
														
 
															+		if (hint)
														
 
															+			last = max(hint->key.objectid, search_start);
														
 
															+		else
														
 
															+			last = search_start;
														
 
															+	}
														
 
															+again:
														
 
															+	while (1) {
														
 
															+		cache = btrfs_lookup_first_block_group(root->fs_info, last);
														
 
															+		if (!cache)
														
 
															+			break;
														
 
															+
														
 
															+		spin_lock(&cache->lock);
														
 
															+		last = cache->key.objectid + cache->key.offset;
														
 
															+		used = btrfs_block_group_used(&cache->item);
														
 
															+
														
 
															+		if (!cache->ro && block_group_bits(cache, data)) {
														
 
															+			free_check = div_factor(cache->key.offset, factor);
														
 
															+			if (used + cache->pinned + cache->reserved <
														
 
															+			    free_check) {
														
 
															+				found_group = cache;
														
 
															+				spin_unlock(&cache->lock);
														
 
															+				goto found;
														
 
															+			}
														
 
															+		}
														
 
															+		spin_unlock(&cache->lock);
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	if (!wrapped) {
														
 
															+		last = search_start;
														
 
															+		wrapped = 1;
														
 
															+		goto again;
														
 
															+	}
														
 
															+	if (!full_search && factor < 10) {
														
 
															+		last = search_start;
														
 
															+		full_search = 1;
														
 
															+		factor = 10;
														
 
															+		goto again;
														
 
															+	}
														
 
															+found:
														
 
															+	return found_group;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
														
 
															+						 struct btrfs_block_group_cache
														
 
															+						 *hint, u64 search_start,
														
 
															+						 int data, int owner)
														
 
															+{
														
 
															+
														
 
															+	struct btrfs_block_group_cache *ret;
														
 
															+	ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* simple helper to search for an existing extent at a given offset */
														
 
															+int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_path *path;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	maybe_lock_mutex(root);
														
 
															+	key.objectid = start;
														
 
															+	key.offset = len;
														
 
															+	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
														
 
															+	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
														
 
															+				0, 0);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Back reference rules.  Back refs have three main goals:
														
 
															+ *
														
 
															+ * 1) differentiate between all holders of references to an extent so that
														
 
															+ *    when a reference is dropped we can make sure it was a valid reference
														
 
															+ *    before freeing the extent.
														
 
															+ *
														
 
															+ * 2) Provide enough information to quickly find the holders of an extent
														
 
															+ *    if we notice a given block is corrupted or bad.
														
 
															+ *
														
 
															+ * 3) Make it easy to migrate blocks for FS shrinking or storage pool
														
 
															+ *    maintenance.  This is actually the same as #2, but with a slightly
														
 
															+ *    different use case.
														
 
															+ *
														
 
															+ * File extents can be referenced by:
														
 
															+ *
														
 
															+ * - multiple snapshots, subvolumes, or different generations in one subvol
														
 
															+ * - different files inside a single subvolume
														
 
															+ * - different offsets inside a file (bookend extents in file.c)
														
 
															+ *
														
 
															+ * The extent ref structure has fields for:
														
 
															+ *
														
 
															+ * - Objectid of the subvolume root
														
 
															+ * - Generation number of the tree holding the reference
														
 
															+ * - objectid of the file holding the reference
														
 
															+ * - number of references holding by parent node (alway 1 for tree blocks)
														
 
															+ *
														
 
															+ * Btree leaf may hold multiple references to a file extent. In most cases,
														
 
															+ * these references are from same file and the corresponding offsets inside
														
 
															+ * the file are close together.
														
 
															+ *
														
 
															+ * When a file extent is allocated the fields are filled in:
														
 
															+ *     (root_key.objectid, trans->transid, inode objectid, 1)
														
 
															+ *
														
 
															+ * When a leaf is cow'd new references are added for every file extent found
														
 
															+ * in the leaf.  It looks similar to the create case, but trans->transid will
														
 
															+ * be different when the block is cow'd.
														
 
															+ *
														
 
															+ *     (root_key.objectid, trans->transid, inode objectid,
														
 
															+ *      number of references in the leaf)
														
 
															+ *
														
 
															+ * When a file extent is removed either during snapshot deletion or
														
 
															+ * file truncation, we find the corresponding back reference and check
														
 
															+ * the following fields:
														
 
															+ *
														
 
															+ *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
														
 
															+ *      inode objectid)
														
 
															+ *
														
 
															+ * Btree extents can be referenced by:
														
 
															+ *
														
 
															+ * - Different subvolumes
														
 
															+ * - Different generations of the same subvolume
														
 
															+ *
														
 
															+ * When a tree block is created, back references are inserted:
														
 
															+ *
														
 
															+ * (root->root_key.objectid, trans->transid, level, 1)
														
 
															+ *
														
 
															+ * When a tree block is cow'd, new back references are added for all the
														
 
															+ * blocks it points to. If the tree block isn't in reference counted root,
														
 
															+ * the old back references are removed. These new back references are of
														
 
															+ * the form (trans->transid will have increased since creation):
														
 
															+ *
														
 
															+ * (root->root_key.objectid, trans->transid, level, 1)
														
 
															+ *
														
 
															+ * When a backref is in deleting, the following fields are checked:
														
 
															+ *
														
 
															+ * if backref was for a tree root:
														
 
															+ *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
														
 
															+ * else
														
 
															+ *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
														
 
															+ *
														
 
															+ * Back Reference Key composing:
														
 
															+ *
														
 
															+ * The key objectid corresponds to the first byte in the extent, the key
														
 
															+ * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
														
 
															+ * byte of parent extent. If a extent is tree root, the key offset is set
														
 
															+ * to the key objectid.
														
 
															+ */
														
 
															+
														
 
															+static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path,
														
 
															+					  u64 bytenr, u64 parent,
														
 
															+					  u64 ref_root, u64 ref_generation,
														
 
															+					  u64 owner_objectid, int del)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_extent_ref *ref;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	u64 ref_objectid;
														
 
															+	int ret;
														
 
															+
														
 
															+	key.objectid = bytenr;
														
 
															+	key.type = BTRFS_EXTENT_REF_KEY;
														
 
															+	key.offset = parent;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	if (ret > 0) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
														
 
															+	ref_objectid = btrfs_ref_objectid(leaf, ref);
														
 
															+	if (btrfs_ref_root(leaf, ref) != ref_root ||
														
 
															+	    btrfs_ref_generation(leaf, ref) != ref_generation ||
														
 
															+	    (ref_objectid != owner_objectid &&
														
 
															+	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
														
 
															+		ret = -EIO;
														
 
															+		WARN_ON(1);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path,
														
 
															+					  u64 bytenr, u64 parent,
														
 
															+					  u64 ref_root, u64 ref_generation,
														
 
															+					  u64 owner_objectid)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_extent_ref *ref;
														
 
															+	u32 num_refs;
														
 
															+	int ret;
														
 
															+
														
 
															+	key.objectid = bytenr;
														
 
															+	key.type = BTRFS_EXTENT_REF_KEY;
														
 
															+	key.offset = parent;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
														
 
															+	if (ret == 0) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		ref = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				     struct btrfs_extent_ref);
														
 
															+		btrfs_set_ref_root(leaf, ref, ref_root);
														
 
															+		btrfs_set_ref_generation(leaf, ref, ref_generation);
														
 
															+		btrfs_set_ref_objectid(leaf, ref, owner_objectid);
														
 
															+		btrfs_set_ref_num_refs(leaf, ref, 1);
														
 
															+	} else if (ret == -EEXIST) {
														
 
															+		u64 existing_owner;
														
 
															+		BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
														
 
															+		leaf = path->nodes[0];
														
 
															+		ref = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				     struct btrfs_extent_ref);
														
 
															+		if (btrfs_ref_root(leaf, ref) != ref_root ||
														
 
															+		    btrfs_ref_generation(leaf, ref) != ref_generation) {
														
 
															+			ret = -EIO;
														
 
															+			WARN_ON(1);
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		num_refs = btrfs_ref_num_refs(leaf, ref);
														
 
															+		BUG_ON(num_refs == 0);
														
 
															+		btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
														
 
															+
														
 
															+		existing_owner = btrfs_ref_objectid(leaf, ref);
														
 
															+		if (existing_owner != owner_objectid &&
														
 
															+		    existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
														
 
															+			btrfs_set_ref_objectid(leaf, ref,
														
 
															+					BTRFS_MULTIPLE_OBJECTIDS);
														
 
															+		}
														
 
															+		ret = 0;
														
 
															+	} else {
														
 
															+		goto out;
														
 
															+	}
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path)
														
 
															+{
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_extent_ref *ref;
														
 
															+	u32 num_refs;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
														
 
															+	num_refs = btrfs_ref_num_refs(leaf, ref);
														
 
															+	BUG_ON(num_refs == 0);
														
 
															+	num_refs -= 1;
														
 
															+	if (num_refs == 0) {
														
 
															+		ret = btrfs_del_item(trans, root, path);
														
 
															+	} else {
														
 
															+		btrfs_set_ref_num_refs(leaf, ref, num_refs);
														
 
															+		btrfs_mark_buffer_dirty(leaf);
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+				     struct btrfs_root *root, u64 bytenr,
														
 
															+				     u64 orig_parent, u64 parent,
														
 
															+				     u64 orig_root, u64 ref_root,
														
 
															+				     u64 orig_generation, u64 ref_generation,
														
 
															+				     u64 owner_objectid)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_root *extent_root = root->fs_info->extent_root;
														
 
															+	struct btrfs_path *path;
														
 
															+
														
 
															+	if (root == root->fs_info->extent_root) {
														
 
															+		struct pending_extent_op *extent_op;
														
 
															+		u64 num_bytes;
														
 
															+
														
 
															+		BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
														
 
															+		num_bytes = btrfs_level_size(root, (int)owner_objectid);
														
 
															+		if (test_range_bit(&root->fs_info->extent_ins, bytenr,
														
 
															+				bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
														
 
															+			u64 priv;
														
 
															+			ret = get_state_private(&root->fs_info->extent_ins,
														
 
															+						bytenr, &priv);
														
 
															+			BUG_ON(ret);
														
 
															+			extent_op = (struct pending_extent_op *)
														
 
															+							(unsigned long)priv;
														
 
															+			BUG_ON(extent_op->parent != orig_parent);
														
 
															+			BUG_ON(extent_op->generation != orig_generation);
														
 
															+			extent_op->parent = parent;
														
 
															+			extent_op->generation = ref_generation;
														
 
															+		} else {
														
 
															+			extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
														
 
															+			BUG_ON(!extent_op);
														
 
															+
														
 
															+			extent_op->type = PENDING_BACKREF_UPDATE;
														
 
															+			extent_op->bytenr = bytenr;
														
 
															+			extent_op->num_bytes = num_bytes;
														
 
															+			extent_op->parent = parent;
														
 
															+			extent_op->orig_parent = orig_parent;
														
 
															+			extent_op->generation = ref_generation;
														
 
															+			extent_op->orig_generation = orig_generation;
														
 
															+			extent_op->level = (int)owner_objectid;
														
 
															+
														
 
															+			set_extent_bits(&root->fs_info->extent_ins,
														
 
															+					bytenr, bytenr + num_bytes - 1,
														
 
															+					EXTENT_LOCKED, GFP_NOFS);
														
 
															+			set_state_private(&root->fs_info->extent_ins,
														
 
															+					  bytenr, (unsigned long)extent_op);
														
 
															+		}
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+	ret = lookup_extent_backref(trans, extent_root, path,
														
 
															+				    bytenr, orig_parent, orig_root,
														
 
															+				    orig_generation, owner_objectid, 1);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	ret = remove_extent_backref(trans, extent_root, path);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	ret = insert_extent_backref(trans, extent_root, path, bytenr,
														
 
															+				    parent, ref_root, ref_generation,
														
 
															+				    owner_objectid);
														
 
															+	BUG_ON(ret);
														
 
															+	finish_current_insert(trans, extent_root);
														
 
															+	del_pending_extents(trans, extent_root);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, u64 bytenr,
														
 
															+			    u64 orig_parent, u64 parent,
														
 
															+			    u64 ref_root, u64 ref_generation,
														
 
															+			    u64 owner_objectid)
														
 
															+{
														
 
															+	int ret;
														
 
															+	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
														
 
															+	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
														
 
															+		return 0;
														
 
															+	maybe_lock_mutex(root);
														
 
															+	ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
														
 
															+					parent, ref_root, ref_root,
														
 
															+					ref_generation, ref_generation,
														
 
															+					owner_objectid);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+				  struct btrfs_root *root, u64 bytenr,
														
 
															+				  u64 orig_parent, u64 parent,
														
 
															+				  u64 orig_root, u64 ref_root,
														
 
															+				  u64 orig_generation, u64 ref_generation,
														
 
															+				  u64 owner_objectid)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct extent_buffer *l;
														
 
															+	struct btrfs_extent_item *item;
														
 
															+	u32 refs;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	path->reada = 1;
														
 
															+	key.objectid = bytenr;
														
 
															+	key.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
														
 
															+				0, 1);
														
 
															+	if (ret < 0)
														
 
															+		return ret;
														
 
															+	BUG_ON(ret == 0 || path->slots[0] == 0);
														
 
															+
														
 
															+	path->slots[0]--;
														
 
															+	l = path->nodes[0];
														
 
															+
														
 
															+	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
														
 
															+	BUG_ON(key.objectid != bytenr);
														
 
															+	BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
														
 
															+
														
 
															+	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
														
 
															+	refs = btrfs_extent_refs(l, item);
														
 
															+	btrfs_set_extent_refs(l, item, refs + 1);
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+
														
 
															+	btrfs_release_path(root->fs_info->extent_root, path);
														
 
															+
														
 
															+	path->reada = 1;
														
 
															+	ret = insert_extent_backref(trans, root->fs_info->extent_root,
														
 
															+				    path, bytenr, parent,
														
 
															+				    ref_root, ref_generation,
														
 
															+				    owner_objectid);
														
 
															+	BUG_ON(ret);
														
 
															+	finish_current_insert(trans, root->fs_info->extent_root);
														
 
															+	del_pending_extents(trans, root->fs_info->extent_root);
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+			 struct btrfs_root *root,
														
 
															+			 u64 bytenr, u64 num_bytes, u64 parent,
														
 
															+			 u64 ref_root, u64 ref_generation,
														
 
															+			 u64 owner_objectid)
														
 
															+{
														
 
															+	int ret;
														
 
															+	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
														
 
															+	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
														
 
															+		return 0;
														
 
															+	maybe_lock_mutex(root);
														
 
															+	ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
														
 
															+				     0, ref_root, 0, ref_generation,
														
 
															+				     owner_objectid);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
														
 
															+			 struct btrfs_root *root)
														
 
															+{
														
 
															+	finish_current_insert(trans, root->fs_info->extent_root);
														
 
															+	del_pending_extents(trans, root->fs_info->extent_root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root, u64 bytenr,
														
 
															+			    u64 num_bytes, u32 *refs)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct extent_buffer *l;
														
 
															+	struct btrfs_extent_item *item;
														
 
															+
														
 
															+	WARN_ON(num_bytes < root->sectorsize);
														
 
															+	path = btrfs_alloc_path();
														
 
															+	path->reada = 1;
														
 
															+	key.objectid = bytenr;
														
 
															+	key.offset = num_bytes;
														
 
															+	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
														
 
															+	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
														
 
															+				0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	if (ret != 0) {
														
 
															+		btrfs_print_leaf(root, path->nodes[0]);
														
 
															+		printk("failed to find block number %Lu\n", bytenr);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	l = path->nodes[0];
														
 
															+	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
														
 
															+	*refs = btrfs_extent_refs(l, item);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int get_reference_status(struct btrfs_root *root, u64 bytenr,
														
 
															+				u64 parent_gen, u64 ref_objectid,
														
 
															+			        u64 *min_generation, u32 *ref_count)
														
 
															+{
														
 
															+	struct btrfs_root *extent_root = root->fs_info->extent_root;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_extent_ref *ref_item;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	u64 root_objectid = root->root_key.objectid;
														
 
															+	u64 ref_generation;
														
 
															+	u32 nritems;
														
 
															+	int ret;
														
 
															+
														
 
															+	key.objectid = bytenr;
														
 
															+	key.offset = (u64)-1;
														
 
															+	key.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	BUG_ON(ret == 0);
														
 
															+	if (ret < 0 || path->slots[0] == 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	path->slots[0]--;
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+
														
 
															+	if (found_key.objectid != bytenr ||
														
 
															+	    found_key.type != BTRFS_EXTENT_ITEM_KEY) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	*ref_count = 0;
														
 
															+	*min_generation = (u64)-1;
														
 
															+
														
 
															+	while (1) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			ret = btrfs_next_leaf(extent_root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			if (ret == 0)
														
 
															+				continue;
														
 
															+			break;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		if (found_key.objectid != bytenr)
														
 
															+			break;
														
 
															+
														
 
															+		if (found_key.type != BTRFS_EXTENT_REF_KEY) {
														
 
															+			path->slots[0]++;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		ref_item = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+					  struct btrfs_extent_ref);
														
 
															+		ref_generation = btrfs_ref_generation(leaf, ref_item);
														
 
															+		/*
														
 
															+		 * For (parent_gen > 0 && parent_gen > ref_generation):
														
 
															+		 *
														
 
															+		 * we reach here through the oldest root, therefore
														
 
															+		 * all other reference from same snapshot should have
														
 
															+		 * a larger generation.
														
 
															+		 */
														
 
															+		if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
														
 
															+		    (parent_gen > 0 && parent_gen > ref_generation) ||
														
 
															+		    (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
														
 
															+		     ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
														
 
															+			*ref_count = 2;
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		*ref_count = 1;
														
 
															+		if (*min_generation > ref_generation)
														
 
															+			*min_generation = ref_generation;
														
 
															+
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   struct btrfs_key *key, u64 bytenr)
														
 
															+{
														
 
															+	struct btrfs_root *old_root;
														
 
															+	struct btrfs_path *path = NULL;
														
 
															+	struct extent_buffer *eb;
														
 
															+	struct btrfs_file_extent_item *item;
														
 
															+	u64 ref_generation;
														
 
															+	u64 min_generation;
														
 
															+	u64 extent_start;
														
 
															+	u32 ref_count;
														
 
															+	int level;
														
 
															+	int ret;
														
 
															+
														
 
															+	BUG_ON(trans == NULL);
														
 
															+	BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
														
 
															+	ret = get_reference_status(root, bytenr, 0, key->objectid,
														
 
															+				   &min_generation, &ref_count);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	if (ref_count != 1)
														
 
															+		return 1;
														
 
															+
														
 
															+	old_root = root->dirty_root->root;
														
 
															+	ref_generation = old_root->root_key.offset;
														
 
															+
														
 
															+	/* all references are created in running transaction */
														
 
															+	if (min_generation > ref_generation) {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	path->skip_locking = 1;
														
 
															+	/* if no item found, the extent is referenced by other snapshot */
														
 
															+	ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	eb = path->nodes[0];
														
 
															+	item = btrfs_item_ptr(eb, path->slots[0],
														
 
															+			      struct btrfs_file_extent_item);
														
 
															+	if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
														
 
															+	    btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
														
 
															+		if (level >= 0) {
														
 
															+			eb = path->nodes[level];
														
 
															+			if (!eb)
														
 
															+				continue;
														
 
															+			extent_start = eb->start;
														
 
															+		} else
														
 
															+			extent_start = bytenr;
														
 
															+
														
 
															+		ret = get_reference_status(root, extent_start, ref_generation,
														
 
															+					   0, &min_generation, &ref_count);
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+
														
 
															+		if (ref_count != 1) {
														
 
															+			ret = 1;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		if (level >= 0)
														
 
															+			ref_generation = btrfs_header_generation(eb);
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	if (path)
														
 
															+		btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		    struct extent_buffer *buf, u32 nr_extents)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	u64 root_gen;
														
 
															+	u32 nritems;
														
 
															+	int i;
														
 
															+	int level;
														
 
															+	int ret = 0;
														
 
															+	int shared = 0;
														
 
															+
														
 
															+	if (!root->ref_cows)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
														
 
															+		shared = 0;
														
 
															+		root_gen = root->root_key.offset;
														
 
															+	} else {
														
 
															+		shared = 1;
														
 
															+		root_gen = trans->transid - 1;
														
 
															+	}
														
 
															+
														
 
															+	level = btrfs_header_level(buf);
														
 
															+	nritems = btrfs_header_nritems(buf);
														
 
															+
														
 
															+	if (level == 0) {
														
 
															+		struct btrfs_leaf_ref *ref;
														
 
															+		struct btrfs_extent_info *info;
														
 
															+
														
 
															+		ref = btrfs_alloc_leaf_ref(root, nr_extents);
														
 
															+		if (!ref) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		ref->root_gen = root_gen;
														
 
															+		ref->bytenr = buf->start;
														
 
															+		ref->owner = btrfs_header_owner(buf);
														
 
															+		ref->generation = btrfs_header_generation(buf);
														
 
															+		ref->nritems = nr_extents;
														
 
															+		info = ref->extents;
														
 
															+
														
 
															+		for (i = 0; nr_extents > 0 && i < nritems; i++) {
														
 
															+			u64 disk_bytenr;
														
 
															+			btrfs_item_key_to_cpu(buf, &key, i);
														
 
															+			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
														
 
															+				continue;
														
 
															+			fi = btrfs_item_ptr(buf, i,
														
 
															+					    struct btrfs_file_extent_item);
														
 
															+			if (btrfs_file_extent_type(buf, fi) ==
														
 
															+			    BTRFS_FILE_EXTENT_INLINE)
														
 
															+				continue;
														
 
															+			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
														
 
															+			if (disk_bytenr == 0)
														
 
															+				continue;
														
 
															+
														
 
															+			info->bytenr = disk_bytenr;
														
 
															+			info->num_bytes =
														
 
															+				btrfs_file_extent_disk_num_bytes(buf, fi);
														
 
															+			info->objectid = key.objectid;
														
 
															+			info->offset = key.offset;
														
 
															+			info++;
														
 
															+		}
														
 
															+
														
 
															+		ret = btrfs_add_leaf_ref(root, ref, shared);
														
 
															+		if (ret == -EEXIST && shared) {
														
 
															+			struct btrfs_leaf_ref *old;
														
 
															+			old = btrfs_lookup_leaf_ref(root, ref->bytenr);
														
 
															+			BUG_ON(!old);
														
 
															+			btrfs_remove_leaf_ref(root, old);
														
 
															+			btrfs_free_leaf_ref(root, old);
														
 
															+			ret = btrfs_add_leaf_ref(root, ref, shared);
														
 
															+		}
														
 
															+		WARN_ON(ret);
														
 
															+		btrfs_free_leaf_ref(root, ref);
														
 
															+	}
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		  struct extent_buffer *orig_buf, struct extent_buffer *buf,
														
 
															+		  u32 *nr_extents)
														
 
															+{
														
 
															+	u64 bytenr;
														
 
															+	u64 ref_root;
														
 
															+	u64 orig_root;
														
 
															+	u64 ref_generation;
														
 
															+	u64 orig_generation;
														
 
															+	u32 nritems;
														
 
															+	u32 nr_file_extents = 0;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	int i;
														
 
															+	int level;
														
 
															+	int ret = 0;
														
 
															+	int faili = 0;
														
 
															+	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
														
 
															+			    u64, u64, u64, u64, u64, u64, u64, u64);
														
 
															+
														
 
															+	ref_root = btrfs_header_owner(buf);
														
 
															+	ref_generation = btrfs_header_generation(buf);
														
 
															+	orig_root = btrfs_header_owner(orig_buf);
														
 
															+	orig_generation = btrfs_header_generation(orig_buf);
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(buf);
														
 
															+	level = btrfs_header_level(buf);
														
 
															+
														
 
															+	if (root->ref_cows) {
														
 
															+		process_func = __btrfs_inc_extent_ref;
														
 
															+	} else {
														
 
															+		if (level == 0 &&
														
 
															+		    root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
														
 
															+			goto out;
														
 
															+		if (level != 0 &&
														
 
															+		    root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
														
 
															+			goto out;
														
 
															+		process_func = __btrfs_update_extent_ref;
														
 
															+	}
														
 
															+
														
 
															+	for (i = 0; i < nritems; i++) {
														
 
															+		cond_resched();
														
 
															+		if (level == 0) {
														
 
															+			btrfs_item_key_to_cpu(buf, &key, i);
														
 
															+			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
														
 
															+				continue;
														
 
															+			fi = btrfs_item_ptr(buf, i,
														
 
															+					    struct btrfs_file_extent_item);
														
 
															+			if (btrfs_file_extent_type(buf, fi) ==
														
 
															+			    BTRFS_FILE_EXTENT_INLINE)
														
 
															+				continue;
														
 
															+			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
														
 
															+			if (bytenr == 0)
														
 
															+				continue;
														
 
															+
														
 
															+			nr_file_extents++;
														
 
															+
														
 
															+			maybe_lock_mutex(root);
														
 
															+			ret = process_func(trans, root, bytenr,
														
 
															+					   orig_buf->start, buf->start,
														
 
															+					   orig_root, ref_root,
														
 
															+					   orig_generation, ref_generation,
														
 
															+					   key.objectid);
														
 
															+			maybe_unlock_mutex(root);
														
 
															+
														
 
															+			if (ret) {
														
 
															+				faili = i;
														
 
															+				WARN_ON(1);
														
 
															+				goto fail;
														
 
															+			}
														
 
															+		} else {
														
 
															+			bytenr = btrfs_node_blockptr(buf, i);
														
 
															+			maybe_lock_mutex(root);
														
 
															+			ret = process_func(trans, root, bytenr,
														
 
															+					   orig_buf->start, buf->start,
														
 
															+					   orig_root, ref_root,
														
 
															+					   orig_generation, ref_generation,
														
 
															+					   level - 1);
														
 
															+			maybe_unlock_mutex(root);
														
 
															+			if (ret) {
														
 
															+				faili = i;
														
 
															+				WARN_ON(1);
														
 
															+				goto fail;
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+out:
														
 
															+	if (nr_extents) {
														
 
															+		if (level == 0)
														
 
															+			*nr_extents = nr_file_extents;
														
 
															+		else
														
 
															+			*nr_extents = nritems;
														
 
															+	}
														
 
															+	return 0;
														
 
															+fail:
														
 
															+	WARN_ON(1);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_update_ref(struct btrfs_trans_handle *trans,
														
 
															+		     struct btrfs_root *root, struct extent_buffer *orig_buf,
														
 
															+		     struct extent_buffer *buf, int start_slot, int nr)
														
 
															+
														
 
															+{
														
 
															+	u64 bytenr;
														
 
															+	u64 ref_root;
														
 
															+	u64 orig_root;
														
 
															+	u64 ref_generation;
														
 
															+	u64 orig_generation;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	int i;
														
 
															+	int ret;
														
 
															+	int slot;
														
 
															+	int level;
														
 
															+
														
 
															+	BUG_ON(start_slot < 0);
														
 
															+	BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
														
 
															+
														
 
															+	ref_root = btrfs_header_owner(buf);
														
 
															+	ref_generation = btrfs_header_generation(buf);
														
 
															+	orig_root = btrfs_header_owner(orig_buf);
														
 
															+	orig_generation = btrfs_header_generation(orig_buf);
														
 
															+	level = btrfs_header_level(buf);
														
 
															+
														
 
															+	if (!root->ref_cows) {
														
 
															+		if (level == 0 &&
														
 
															+		    root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
														
 
															+			return 0;
														
 
															+		if (level != 0 &&
														
 
															+		    root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
														
 
															+			return 0;
														
 
															+	}
														
 
															+
														
 
															+	for (i = 0, slot = start_slot; i < nr; i++, slot++) {
														
 
															+		cond_resched();
														
 
															+		if (level == 0) {
														
 
															+			btrfs_item_key_to_cpu(buf, &key, slot);
														
 
															+			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
														
 
															+				continue;
														
 
															+			fi = btrfs_item_ptr(buf, slot,
														
 
															+					    struct btrfs_file_extent_item);
														
 
															+			if (btrfs_file_extent_type(buf, fi) ==
														
 
															+			    BTRFS_FILE_EXTENT_INLINE)
														
 
															+				continue;
														
 
															+			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
														
 
															+			if (bytenr == 0)
														
 
															+				continue;
														
 
															+			maybe_lock_mutex(root);
														
 
															+			ret = __btrfs_update_extent_ref(trans, root, bytenr,
														
 
															+					    orig_buf->start, buf->start,
														
 
															+					    orig_root, ref_root,
														
 
															+					    orig_generation, ref_generation,
														
 
															+					    key.objectid);
														
 
															+			maybe_unlock_mutex(root);
														
 
															+			if (ret)
														
 
															+				goto fail;
														
 
															+		} else {
														
 
															+			bytenr = btrfs_node_blockptr(buf, slot);
														
 
															+			maybe_lock_mutex(root);
														
 
															+			ret = __btrfs_update_extent_ref(trans, root, bytenr,
														
 
															+					    orig_buf->start, buf->start,
														
 
															+					    orig_root, ref_root,
														
 
															+					    orig_generation, ref_generation,
														
 
															+					    level - 1);
														
 
															+			maybe_unlock_mutex(root);
														
 
															+			if (ret)
														
 
															+				goto fail;
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+fail:
														
 
															+	WARN_ON(1);
														
 
															+	return -1;
														
 
															+}
														
 
															+
														
 
															+static int write_one_cache_group(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *root,
														
 
															+				 struct btrfs_path *path,
														
 
															+				 struct btrfs_block_group_cache *cache)
														
 
															+{
														
 
															+	int ret;
														
 
															+	int pending_ret;
														
 
															+	struct btrfs_root *extent_root = root->fs_info->extent_root;
														
 
															+	unsigned long bi;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto fail;
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
														
 
															+	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+	btrfs_release_path(extent_root, path);
														
 
															+fail:
														
 
															+	finish_current_insert(trans, extent_root);
														
 
															+	pending_ret = del_pending_extents(trans, extent_root);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+	if (pending_ret)
														
 
															+		return pending_ret;
														
 
															+	return 0;
														
 
															+
														
 
															+}
														
 
															+
														
 
															+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache, *entry;
														
 
															+	struct rb_node *n;
														
 
															+	int err = 0;
														
 
															+	int werr = 0;
														
 
															+	struct btrfs_path *path;
														
 
															+	u64 last = 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	while(1) {
														
 
															+		cache = NULL;
														
 
															+		spin_lock(&root->fs_info->block_group_cache_lock);
														
 
															+		for (n = rb_first(&root->fs_info->block_group_cache_tree);
														
 
															+		     n; n = rb_next(n)) {
														
 
															+			entry = rb_entry(n, struct btrfs_block_group_cache,
														
 
															+					 cache_node);
														
 
															+			if (entry->dirty) {
														
 
															+				cache = entry;
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+		spin_unlock(&root->fs_info->block_group_cache_lock);
														
 
															+
														
 
															+		if (!cache)
														
 
															+			break;
														
 
															+
														
 
															+		cache->dirty = 0;
														
 
															+		last += cache->key.offset;
														
 
															+
														
 
															+		err = write_one_cache_group(trans, root,
														
 
															+					    path, cache);
														
 
															+		/*
														
 
															+		 * if we fail to write the cache group, we want
														
 
															+		 * to keep it marked dirty in hopes that a later
														
 
															+		 * write will work
														
 
															+		 */
														
 
															+		if (err) {
														
 
															+			werr = err;
														
 
															+			continue;
														
 
															+		}
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+	return werr;
														
 
															+}
														
 
															+
														
 
															+static int update_space_info(struct btrfs_fs_info *info, u64 flags,
														
 
															+			     u64 total_bytes, u64 bytes_used,
														
 
															+			     struct btrfs_space_info **space_info)
														
 
															+{
														
 
															+	struct btrfs_space_info *found;
														
 
															+
														
 
															+	found = __find_space_info(info, flags);
														
 
															+	if (found) {
														
 
															+		found->total_bytes += total_bytes;
														
 
															+		found->bytes_used += bytes_used;
														
 
															+		found->full = 0;
														
 
															+		*space_info = found;
														
 
															+		return 0;
														
 
															+	}
														
 
															+	found = kmalloc(sizeof(*found), GFP_NOFS);
														
 
															+	if (!found)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	list_add(&found->list, &info->space_info);
														
 
															+	INIT_LIST_HEAD(&found->block_groups);
														
 
															+	spin_lock_init(&found->lock);
														
 
															+	found->flags = flags;
														
 
															+	found->total_bytes = total_bytes;
														
 
															+	found->bytes_used = bytes_used;
														
 
															+	found->bytes_pinned = 0;
														
 
															+	found->bytes_reserved = 0;
														
 
															+	found->full = 0;
														
 
															+	found->force_alloc = 0;
														
 
															+	*space_info = found;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
														
 
															+{
														
 
															+	u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
														
 
															+				   BTRFS_BLOCK_GROUP_RAID1 |
														
 
															+				   BTRFS_BLOCK_GROUP_RAID10 |
														
 
															+				   BTRFS_BLOCK_GROUP_DUP);
														
 
															+	if (extra_flags) {
														
 
															+		if (flags & BTRFS_BLOCK_GROUP_DATA)
														
 
															+			fs_info->avail_data_alloc_bits |= extra_flags;
														
 
															+		if (flags & BTRFS_BLOCK_GROUP_METADATA)
														
 
															+			fs_info->avail_metadata_alloc_bits |= extra_flags;
														
 
															+		if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
														
 
															+			fs_info->avail_system_alloc_bits |= extra_flags;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
														
 
															+{
														
 
															+	u64 num_devices = root->fs_info->fs_devices->num_devices;
														
 
															+
														
 
															+	if (num_devices == 1)
														
 
															+		flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
														
 
															+	if (num_devices < 4)
														
 
															+		flags &= ~BTRFS_BLOCK_GROUP_RAID10;
														
 
															+
														
 
															+	if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
														
 
															+	    (flags & (BTRFS_BLOCK_GROUP_RAID1 |
														
 
															+		      BTRFS_BLOCK_GROUP_RAID10))) {
														
 
															+		flags &= ~BTRFS_BLOCK_GROUP_DUP;
														
 
															+	}
														
 
															+
														
 
															+	if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
														
 
															+	    (flags & BTRFS_BLOCK_GROUP_RAID10)) {
														
 
															+		flags &= ~BTRFS_BLOCK_GROUP_RAID1;
														
 
															+	}
														
 
															+
														
 
															+	if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
														
 
															+	    ((flags & BTRFS_BLOCK_GROUP_RAID1) |
														
 
															+	     (flags & BTRFS_BLOCK_GROUP_RAID10) |
														
 
															+	     (flags & BTRFS_BLOCK_GROUP_DUP)))
														
 
															+		flags &= ~BTRFS_BLOCK_GROUP_RAID0;
														
 
															+	return flags;
														
 
															+}
														
 
															+
														
 
															+static int do_chunk_alloc(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *extent_root, u64 alloc_bytes,
														
 
															+			  u64 flags, int force)
														
 
															+{
														
 
															+	struct btrfs_space_info *space_info;
														
 
															+	u64 thresh;
														
 
															+	u64 start;
														
 
															+	u64 num_bytes;
														
 
															+	int ret = 0, waited = 0;
														
 
															+
														
 
															+	flags = reduce_alloc_profile(extent_root, flags);
														
 
															+
														
 
															+	space_info = __find_space_info(extent_root->fs_info, flags);
														
 
															+	if (!space_info) {
														
 
															+		ret = update_space_info(extent_root->fs_info, flags,
														
 
															+					0, 0, &space_info);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	BUG_ON(!space_info);
														
 
															+
														
 
															+	if (space_info->force_alloc) {
														
 
															+		force = 1;
														
 
															+		space_info->force_alloc = 0;
														
 
															+	}
														
 
															+	if (space_info->full)
														
 
															+		goto out;
														
 
															+
														
 
															+	thresh = div_factor(space_info->total_bytes, 6);
														
 
															+	if (!force &&
														
 
															+	   (space_info->bytes_used + space_info->bytes_pinned +
														
 
															+	    space_info->bytes_reserved + alloc_bytes) < thresh)
														
 
															+		goto out;
														
 
															+
														
 
															+	while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) {
														
 
															+		if (!force)
														
 
															+			goto out;
														
 
															+		mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+		cond_resched();
														
 
															+		mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+		waited = 1;
														
 
															+	}
														
 
															+
														
 
															+	if (waited && space_info->full)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
														
 
															+	if (ret == -ENOSPC) {
														
 
															+printk("space info full %Lu\n", flags);
														
 
															+		space_info->full = 1;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	ret = btrfs_make_block_group(trans, extent_root, 0, flags,
														
 
															+		     BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+out_unlock:
														
 
															+	mutex_unlock(&extent_root->fs_info->chunk_mutex);
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int update_block_group(struct btrfs_trans_handle *trans,
														
 
															+			      struct btrfs_root *root,
														
 
															+			      u64 bytenr, u64 num_bytes, int alloc,
														
 
															+			      int mark_free)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	u64 total = num_bytes;
														
 
															+	u64 old_val;
														
 
															+	u64 byte_in_group;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	while(total) {
														
 
															+		cache = btrfs_lookup_block_group(info, bytenr);
														
 
															+		if (!cache) {
														
 
															+			return -1;
														
 
															+		}
														
 
															+		byte_in_group = bytenr - cache->key.objectid;
														
 
															+		WARN_ON(byte_in_group > cache->key.offset);
														
 
															+
														
 
															+		spin_lock(&cache->lock);
														
 
															+		cache->dirty = 1;
														
 
															+		old_val = btrfs_block_group_used(&cache->item);
														
 
															+		num_bytes = min(total, cache->key.offset - byte_in_group);
														
 
															+		if (alloc) {
														
 
															+			old_val += num_bytes;
														
 
															+			cache->space_info->bytes_used += num_bytes;
														
 
															+			btrfs_set_block_group_used(&cache->item, old_val);
														
 
															+			spin_unlock(&cache->lock);
														
 
															+		} else {
														
 
															+			old_val -= num_bytes;
														
 
															+			cache->space_info->bytes_used -= num_bytes;
														
 
															+			btrfs_set_block_group_used(&cache->item, old_val);
														
 
															+			spin_unlock(&cache->lock);
														
 
															+			if (mark_free) {
														
 
															+				int ret;
														
 
															+				ret = btrfs_add_free_space(cache, bytenr,
														
 
															+							   num_bytes);
														
 
															+				if (ret)
														
 
															+					return -1;
														
 
															+			}
														
 
															+		}
														
 
															+		total -= num_bytes;
														
 
															+		bytenr += num_bytes;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
														
 
															+	if (!cache)
														
 
															+		return 0;
														
 
															+
														
 
															+	return cache->key.objectid;
														
 
															+}
														
 
															+
														
 
															+int btrfs_update_pinned_extents(struct btrfs_root *root,
														
 
															+				u64 bytenr, u64 num, int pin)
														
 
															+{
														
 
															+	u64 len;
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+	struct btrfs_fs_info *fs_info = root->fs_info;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	if (pin) {
														
 
															+		set_extent_dirty(&fs_info->pinned_extents,
														
 
															+				bytenr, bytenr + num - 1, GFP_NOFS);
														
 
															+	} else {
														
 
															+		clear_extent_dirty(&fs_info->pinned_extents,
														
 
															+				bytenr, bytenr + num - 1, GFP_NOFS);
														
 
															+	}
														
 
															+	while (num > 0) {
														
 
															+		cache = btrfs_lookup_block_group(fs_info, bytenr);
														
 
															+		BUG_ON(!cache);
														
 
															+		len = min(num, cache->key.offset -
														
 
															+			  (bytenr - cache->key.objectid));
														
 
															+		if (pin) {
														
 
															+			spin_lock(&cache->lock);
														
 
															+			cache->pinned += len;
														
 
															+			cache->space_info->bytes_pinned += len;
														
 
															+			spin_unlock(&cache->lock);
														
 
															+			fs_info->total_pinned += len;
														
 
															+		} else {
														
 
															+			spin_lock(&cache->lock);
														
 
															+			cache->pinned -= len;
														
 
															+			cache->space_info->bytes_pinned -= len;
														
 
															+			spin_unlock(&cache->lock);
														
 
															+			fs_info->total_pinned -= len;
														
 
															+		}
														
 
															+		bytenr += len;
														
 
															+		num -= len;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int update_reserved_extents(struct btrfs_root *root,
														
 
															+				   u64 bytenr, u64 num, int reserve)
														
 
															+{
														
 
															+	u64 len;
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+	struct btrfs_fs_info *fs_info = root->fs_info;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	while (num > 0) {
														
 
															+		cache = btrfs_lookup_block_group(fs_info, bytenr);
														
 
															+		BUG_ON(!cache);
														
 
															+		len = min(num, cache->key.offset -
														
 
															+			  (bytenr - cache->key.objectid));
														
 
															+		if (reserve) {
														
 
															+			spin_lock(&cache->lock);
														
 
															+			cache->reserved += len;
														
 
															+			cache->space_info->bytes_reserved += len;
														
 
															+			spin_unlock(&cache->lock);
														
 
															+		} else {
														
 
															+			spin_lock(&cache->lock);
														
 
															+			cache->reserved -= len;
														
 
															+			cache->space_info->bytes_reserved -= len;
														
 
															+			spin_unlock(&cache->lock);
														
 
															+		}
														
 
															+		bytenr += len;
														
 
															+		num -= len;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
														
 
															+{
														
 
															+	u64 last = 0;
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
														
 
															+	int ret;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(pinned_extents, last,
														
 
															+					    &start, &end, EXTENT_DIRTY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+		set_extent_dirty(copy, start, end, GFP_NOFS);
														
 
															+		last = end + 1;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       struct extent_io_tree *unpin)
														
 
															+{
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	int ret;
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(unpin, 0, &start, &end,
														
 
															+					    EXTENT_DIRTY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+		btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
														
 
															+		clear_extent_dirty(unpin, start, end, GFP_NOFS);
														
 
															+		cache = btrfs_lookup_block_group(root->fs_info, start);
														
 
															+		if (cache->cached)
														
 
															+			btrfs_add_free_space(cache, start, end - start + 1);
														
 
															+		if (need_resched()) {
														
 
															+			mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+			cond_resched();
														
 
															+			mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+		}
														
 
															+	}
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int finish_current_insert(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *extent_root)
														
 
															+{
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	u64 priv;
														
 
															+	struct btrfs_fs_info *info = extent_root->fs_info;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_extent_ref *ref;
														
 
															+	struct pending_extent_op *extent_op;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_extent_item extent_item;
														
 
															+	int ret;
														
 
															+	int err = 0;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
														
 
															+	btrfs_set_stack_extent_refs(&extent_item, 1);
														
 
															+	path = btrfs_alloc_path();
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
														
 
															+					    &end, EXTENT_LOCKED);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		ret = get_state_private(&info->extent_ins, start, &priv);
														
 
															+		BUG_ON(ret);
														
 
															+		extent_op = (struct pending_extent_op *)(unsigned long)priv;
														
 
															+
														
 
															+		if (extent_op->type == PENDING_EXTENT_INSERT) {
														
 
															+			key.objectid = start;
														
 
															+			key.offset = end + 1 - start;
														
 
															+			key.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+			err = btrfs_insert_item(trans, extent_root, &key,
														
 
															+					&extent_item, sizeof(extent_item));
														
 
															+			BUG_ON(err);
														
 
															+
														
 
															+			clear_extent_bits(&info->extent_ins, start, end,
														
 
															+					  EXTENT_LOCKED, GFP_NOFS);
														
 
															+
														
 
															+			err = insert_extent_backref(trans, extent_root, path,
														
 
															+						start, extent_op->parent,
														
 
															+						extent_root->root_key.objectid,
														
 
															+						extent_op->generation,
														
 
															+						extent_op->level);
														
 
															+			BUG_ON(err);
														
 
															+		} else if (extent_op->type == PENDING_BACKREF_UPDATE) {
														
 
															+			err = lookup_extent_backref(trans, extent_root, path,
														
 
															+						start, extent_op->orig_parent,
														
 
															+						extent_root->root_key.objectid,
														
 
															+						extent_op->orig_generation,
														
 
															+						extent_op->level, 0);
														
 
															+			BUG_ON(err);
														
 
															+
														
 
															+			clear_extent_bits(&info->extent_ins, start, end,
														
 
															+					  EXTENT_LOCKED, GFP_NOFS);
														
 
															+
														
 
															+			key.objectid = start;
														
 
															+			key.offset = extent_op->parent;
														
 
															+			key.type = BTRFS_EXTENT_REF_KEY;
														
 
															+			err = btrfs_set_item_key_safe(trans, extent_root, path,
														
 
															+						      &key);
														
 
															+			BUG_ON(err);
														
 
															+			ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+					     struct btrfs_extent_ref);
														
 
															+			btrfs_set_ref_generation(path->nodes[0], ref,
														
 
															+						 extent_op->generation);
														
 
															+			btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+			btrfs_release_path(extent_root, path);
														
 
															+		} else {
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+		kfree(extent_op);
														
 
															+
														
 
															+		if (need_resched()) {
														
 
															+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+			cond_resched();
														
 
															+			mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+		}
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int pin_down_bytes(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root,
														
 
															+			  u64 bytenr, u64 num_bytes, int is_data)
														
 
															+{
														
 
															+	int err = 0;
														
 
															+	struct extent_buffer *buf;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	if (is_data)
														
 
															+		goto pinit;
														
 
															+
														
 
															+	buf = btrfs_find_tree_block(root, bytenr, num_bytes);
														
 
															+	if (!buf)
														
 
															+		goto pinit;
														
 
															+
														
 
															+	/* we can reuse a block if it hasn't been written
														
 
															+	 * and it is from this transaction.  We can't
														
 
															+	 * reuse anything from the tree log root because
														
 
															+	 * it has tiny sub-transactions.
														
 
															+	 */
														
 
															+	if (btrfs_buffer_uptodate(buf, 0) &&
														
 
															+	    btrfs_try_tree_lock(buf)) {
														
 
															+		u64 header_owner = btrfs_header_owner(buf);
														
 
															+		u64 header_transid = btrfs_header_generation(buf);
														
 
															+		if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
														
 
															+		    header_owner != BTRFS_TREE_RELOC_OBJECTID &&
														
 
															+		    header_transid == trans->transid &&
														
 
															+		    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
														
 
															+			clean_tree_block(NULL, root, buf);
														
 
															+			btrfs_tree_unlock(buf);
														
 
															+			free_extent_buffer(buf);
														
 
															+			return 1;
														
 
															+		}
														
 
															+		btrfs_tree_unlock(buf);
														
 
															+	}
														
 
															+	free_extent_buffer(buf);
														
 
															+pinit:
														
 
															+	btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
														
 
															+
														
 
															+	BUG_ON(err < 0);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * remove an extent from the root, returns 0 on success
														
 
															+ */
														
 
															+static int __free_extent(struct btrfs_trans_handle *trans,
														
 
															+			 struct btrfs_root *root,
														
 
															+			 u64 bytenr, u64 num_bytes, u64 parent,
														
 
															+			 u64 root_objectid, u64 ref_generation,
														
 
															+			 u64 owner_objectid, int pin, int mark_free)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	struct btrfs_root *extent_root = info->extent_root;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int ret;
														
 
															+	int extent_slot = 0;
														
 
															+	int found_extent = 0;
														
 
															+	int num_to_del = 1;
														
 
															+	struct btrfs_extent_item *ei;
														
 
															+	u32 refs;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	key.objectid = bytenr;
														
 
															+	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
														
 
															+	key.offset = num_bytes;
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	path->reada = 1;
														
 
															+	ret = lookup_extent_backref(trans, extent_root, path,
														
 
															+				    bytenr, parent, root_objectid,
														
 
															+				    ref_generation, owner_objectid, 1);
														
 
															+	if (ret == 0) {
														
 
															+		struct btrfs_key found_key;
														
 
															+		extent_slot = path->slots[0];
														
 
															+		while(extent_slot > 0) {
														
 
															+			extent_slot--;
														
 
															+			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+					      extent_slot);
														
 
															+			if (found_key.objectid != bytenr)
														
 
															+				break;
														
 
															+			if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
														
 
															+			    found_key.offset == num_bytes) {
														
 
															+				found_extent = 1;
														
 
															+				break;
														
 
															+			}
														
 
															+			if (path->slots[0] - extent_slot > 5)
														
 
															+				break;
														
 
															+		}
														
 
															+		if (!found_extent) {
														
 
															+			ret = remove_extent_backref(trans, extent_root, path);
														
 
															+			BUG_ON(ret);
														
 
															+			btrfs_release_path(extent_root, path);
														
 
															+			ret = btrfs_search_slot(trans, extent_root,
														
 
															+						&key, path, -1, 1);
														
 
															+			BUG_ON(ret);
														
 
															+			extent_slot = path->slots[0];
														
 
															+		}
														
 
															+	} else {
														
 
															+		btrfs_print_leaf(extent_root, path->nodes[0]);
														
 
															+		WARN_ON(1);
														
 
															+		printk("Unable to find ref byte nr %Lu root %Lu "
														
 
															+		       "gen %Lu owner %Lu\n", bytenr,
														
 
															+		       root_objectid, ref_generation, owner_objectid);
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	ei = btrfs_item_ptr(leaf, extent_slot,
														
 
															+			    struct btrfs_extent_item);
														
 
															+	refs = btrfs_extent_refs(leaf, ei);
														
 
															+	BUG_ON(refs == 0);
														
 
															+	refs -= 1;
														
 
															+	btrfs_set_extent_refs(leaf, ei, refs);
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+	if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
														
 
															+		struct btrfs_extent_ref *ref;
														
 
															+		ref = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				     struct btrfs_extent_ref);
														
 
															+		BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
														
 
															+		/* if the back ref and the extent are next to each other
														
 
															+		 * they get deleted below in one shot
														
 
															+		 */
														
 
															+		path->slots[0] = extent_slot;
														
 
															+		num_to_del = 2;
														
 
															+	} else if (found_extent) {
														
 
															+		/* otherwise delete the extent back ref */
														
 
															+		ret = remove_extent_backref(trans, extent_root, path);
														
 
															+		BUG_ON(ret);
														
 
															+		/* if refs are 0, we need to setup the path for deletion */
														
 
															+		if (refs == 0) {
														
 
															+			btrfs_release_path(extent_root, path);
														
 
															+			ret = btrfs_search_slot(trans, extent_root, &key, path,
														
 
															+						-1, 1);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (refs == 0) {
														
 
															+		u64 super_used;
														
 
															+		u64 root_used;
														
 
															+#ifdef BIO_RW_DISCARD
														
 
															+		u64 map_length = num_bytes;
														
 
															+		struct btrfs_multi_bio *multi = NULL;
														
 
															+#endif
														
 
															+
														
 
															+		if (pin) {
														
 
															+			ret = pin_down_bytes(trans, root, bytenr, num_bytes,
														
 
															+				owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
														
 
															+			if (ret > 0)
														
 
															+				mark_free = 1;
														
 
															+			BUG_ON(ret < 0);
														
 
															+		}
														
 
															+
														
 
															+		/* block accounting for super block */
														
 
															+		spin_lock_irq(&info->delalloc_lock);
														
 
															+		super_used = btrfs_super_bytes_used(&info->super_copy);
														
 
															+		btrfs_set_super_bytes_used(&info->super_copy,
														
 
															+					   super_used - num_bytes);
														
 
															+		spin_unlock_irq(&info->delalloc_lock);
														
 
															+
														
 
															+		/* block accounting for root item */
														
 
															+		root_used = btrfs_root_used(&root->root_item);
														
 
															+		btrfs_set_root_used(&root->root_item,
														
 
															+					   root_used - num_bytes);
														
 
															+		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
														
 
															+				      num_to_del);
														
 
															+		BUG_ON(ret);
														
 
															+		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
														
 
															+					 mark_free);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+#ifdef BIO_RW_DISCARD
														
 
															+		/* Tell the block device(s) that the sectors can be discarded */
														
 
															+		ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
														
 
															+				      bytenr, &map_length, &multi, 0);
														
 
															+		if (!ret) {
														
 
															+			struct btrfs_bio_stripe *stripe = multi->stripes;
														
 
															+			int i;
														
 
															+
														
 
															+			if (map_length > num_bytes)
														
 
															+				map_length = num_bytes;
														
 
															+
														
 
															+			for (i = 0; i < multi->num_stripes; i++, stripe++) {
														
 
															+				blkdev_issue_discard(stripe->dev->bdev,
														
 
															+						     stripe->physical >> 9,
														
 
															+						     map_length >> 9);
														
 
															+			}
														
 
															+			kfree(multi);
														
 
															+		}
														
 
															+#endif
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	finish_current_insert(trans, extent_root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * find all the blocks marked as pending in the radix tree and remove
														
 
															+ * them from the extent map
														
 
															+ */
														
 
															+static int del_pending_extents(struct btrfs_trans_handle *trans, struct
														
 
															+			       btrfs_root *extent_root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	int err = 0;
														
 
															+	int mark_free = 0;
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	u64 priv;
														
 
															+	struct extent_io_tree *pending_del;
														
 
															+	struct extent_io_tree *extent_ins;
														
 
															+	struct pending_extent_op *extent_op;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
														
 
															+	extent_ins = &extent_root->fs_info->extent_ins;
														
 
															+	pending_del = &extent_root->fs_info->pending_del;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(pending_del, 0, &start, &end,
														
 
															+					    EXTENT_LOCKED);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		ret = get_state_private(pending_del, start, &priv);
														
 
															+		BUG_ON(ret);
														
 
															+		extent_op = (struct pending_extent_op *)(unsigned long)priv;
														
 
															+
														
 
															+		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
														
 
															+				  GFP_NOFS);
														
 
															+
														
 
															+		ret = pin_down_bytes(trans, extent_root, start,
														
 
															+				     end + 1 - start, 0);
														
 
															+		mark_free = ret > 0;
														
 
															+		if (!test_range_bit(extent_ins, start, end,
														
 
															+				    EXTENT_LOCKED, 0)) {
														
 
															+free_extent:
														
 
															+			ret = __free_extent(trans, extent_root,
														
 
															+					    start, end + 1 - start,
														
 
															+					    extent_op->orig_parent,
														
 
															+					    extent_root->root_key.objectid,
														
 
															+					    extent_op->orig_generation,
														
 
															+					    extent_op->level, 0, mark_free);
														
 
															+			kfree(extent_op);
														
 
															+		} else {
														
 
															+			kfree(extent_op);
														
 
															+			ret = get_state_private(extent_ins, start, &priv);
														
 
															+			BUG_ON(ret);
														
 
															+			extent_op = (struct pending_extent_op *)
														
 
															+							(unsigned long)priv;
														
 
															+
														
 
															+			clear_extent_bits(extent_ins, start, end,
														
 
															+					  EXTENT_LOCKED, GFP_NOFS);
														
 
															+
														
 
															+			if (extent_op->type == PENDING_BACKREF_UPDATE)
														
 
															+				goto free_extent;
														
 
															+
														
 
															+			ret = update_block_group(trans, extent_root, start,
														
 
															+						end + 1 - start, 0, mark_free);
														
 
															+			BUG_ON(ret);
														
 
															+			kfree(extent_op);
														
 
															+		}
														
 
															+		if (ret)
														
 
															+			err = ret;
														
 
															+
														
 
															+		if (need_resched()) {
														
 
															+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+			cond_resched();
														
 
															+			mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+		}
														
 
															+	}
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * remove an extent from the root, returns 0 on success
														
 
															+ */
														
 
															+static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       u64 bytenr, u64 num_bytes, u64 parent,
														
 
															+			       u64 root_objectid, u64 ref_generation,
														
 
															+			       u64 owner_objectid, int pin)
														
 
															+{
														
 
															+	struct btrfs_root *extent_root = root->fs_info->extent_root;
														
 
															+	int pending_ret;
														
 
															+	int ret;
														
 
															+
														
 
															+	WARN_ON(num_bytes < root->sectorsize);
														
 
															+	if (root == extent_root) {
														
 
															+		struct pending_extent_op *extent_op;
														
 
															+
														
 
															+		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
														
 
															+		BUG_ON(!extent_op);
														
 
															+
														
 
															+		extent_op->type = PENDING_EXTENT_DELETE;
														
 
															+		extent_op->bytenr = bytenr;
														
 
															+		extent_op->num_bytes = num_bytes;
														
 
															+		extent_op->parent = parent;
														
 
															+		extent_op->orig_parent = parent;
														
 
															+		extent_op->generation = ref_generation;
														
 
															+		extent_op->orig_generation = ref_generation;
														
 
															+		extent_op->level = (int)owner_objectid;
														
 
															+
														
 
															+		set_extent_bits(&root->fs_info->pending_del,
														
 
															+				bytenr, bytenr + num_bytes - 1,
														
 
															+				EXTENT_LOCKED, GFP_NOFS);
														
 
															+		set_state_private(&root->fs_info->pending_del,
														
 
															+				  bytenr, (unsigned long)extent_op);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	/* if metadata always pin */
														
 
															+	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
														
 
															+		if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
														
 
															+			struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+			/* btrfs_free_reserved_extent */
														
 
															+			cache = btrfs_lookup_block_group(root->fs_info, bytenr);
														
 
															+			BUG_ON(!cache);
														
 
															+			btrfs_add_free_space(cache, bytenr, num_bytes);
														
 
															+			update_reserved_extents(root, bytenr, num_bytes, 0);
														
 
															+			return 0;
														
 
															+		}
														
 
															+		pin = 1;
														
 
															+	}
														
 
															+
														
 
															+	/* if data pin when any transaction has committed this */
														
 
															+	if (ref_generation != trans->transid)
														
 
															+		pin = 1;
														
 
															+
														
 
															+	ret = __free_extent(trans, root, bytenr, num_bytes, parent,
														
 
															+			    root_objectid, ref_generation,
														
 
															+			    owner_objectid, pin, pin == 0);
														
 
															+
														
 
															+	finish_current_insert(trans, root->fs_info->extent_root);
														
 
															+	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
														
 
															+	return ret ? ret : pending_ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_free_extent(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *root,
														
 
															+		      u64 bytenr, u64 num_bytes, u64 parent,
														
 
															+		      u64 root_objectid, u64 ref_generation,
														
 
															+		      u64 owner_objectid, int pin)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	maybe_lock_mutex(root);
														
 
															+	ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
														
 
															+				  root_objectid, ref_generation,
														
 
															+				  owner_objectid, pin);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static u64 stripe_align(struct btrfs_root *root, u64 val)
														
 
															+{
														
 
															+	u64 mask = ((u64)root->stripesize - 1);
														
 
															+	u64 ret = (val + mask) & ~mask;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * walks the btree of allocated extents and find a hole of a given size.
														
 
															+ * The key ins is changed to record the hole:
														
 
															+ * ins->objectid == block start
														
 
															+ * ins->flags = BTRFS_EXTENT_ITEM_KEY
														
 
															+ * ins->offset == number of blocks
														
 
															+ * Any available blocks before search_start are skipped.
														
 
															+ */
														
 
															+static int noinline find_free_extent(struct btrfs_trans_handle *trans,
														
 
															+				     struct btrfs_root *orig_root,
														
 
															+				     u64 num_bytes, u64 empty_size,
														
 
															+				     u64 search_start, u64 search_end,
														
 
															+				     u64 hint_byte, struct btrfs_key *ins,
														
 
															+				     u64 exclude_start, u64 exclude_nr,
														
 
															+				     int data)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u64 orig_search_start;
														
 
															+	struct btrfs_root * root = orig_root->fs_info->extent_root;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	u64 total_needed = num_bytes;
														
 
															+	u64 *last_ptr = NULL;
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+	int chunk_alloc_done = 0;
														
 
															+	int empty_cluster = 2 * 1024 * 1024;
														
 
															+	int allowed_chunk_alloc = 0;
														
 
															+
														
 
															+	WARN_ON(num_bytes < root->sectorsize);
														
 
															+	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
														
 
															+
														
 
															+	if (orig_root->ref_cows || empty_size)
														
 
															+		allowed_chunk_alloc = 1;
														
 
															+
														
 
															+	if (data & BTRFS_BLOCK_GROUP_METADATA) {
														
 
															+		last_ptr = &root->fs_info->last_alloc;
														
 
															+		empty_cluster = 256 * 1024;
														
 
															+	}
														
 
															+
														
 
															+	if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
														
 
															+		last_ptr = &root->fs_info->last_data_alloc;
														
 
															+
														
 
															+	if (last_ptr) {
														
 
															+		if (*last_ptr)
														
 
															+			hint_byte = *last_ptr;
														
 
															+		else
														
 
															+			empty_size += empty_cluster;
														
 
															+	}
														
 
															+
														
 
															+	search_start = max(search_start, first_logical_byte(root, 0));
														
 
															+	orig_search_start = search_start;
														
 
															+
														
 
															+	search_start = max(search_start, hint_byte);
														
 
															+	total_needed += empty_size;
														
 
															+
														
 
															+new_group:
														
 
															+	block_group = btrfs_lookup_block_group(info, search_start);
														
 
															+	if (!block_group)
														
 
															+		block_group = btrfs_lookup_first_block_group(info,
														
 
															+							     search_start);
														
 
															+
														
 
															+	/*
														
 
															+	 * Ok this looks a little tricky, buts its really simple.  First if we
														
 
															+	 * didn't find a block group obviously we want to start over.
														
 
															+	 * Secondly, if the block group we found does not match the type we
														
 
															+	 * need, and we have a last_ptr and its not 0, chances are the last
														
 
															+	 * allocation we made was at the end of the block group, so lets go
														
 
															+	 * ahead and skip the looking through the rest of the block groups and
														
 
															+	 * start at the beginning.  This helps with metadata allocations,
														
 
															+	 * since you are likely to have a bunch of data block groups to search
														
 
															+	 * through first before you realize that you need to start over, so go
														
 
															+	 * ahead and start over and save the time.
														
 
															+	 */
														
 
															+	if (!block_group || (!block_group_bits(block_group, data) &&
														
 
															+			     last_ptr && *last_ptr)) {
														
 
															+		if (search_start != orig_search_start) {
														
 
															+			if (last_ptr && *last_ptr) {
														
 
															+				total_needed += empty_cluster;
														
 
															+				*last_ptr = 0;
														
 
															+			}
														
 
															+			search_start = orig_search_start;
														
 
															+			goto new_group;
														
 
															+		} else if (!chunk_alloc_done && allowed_chunk_alloc) {
														
 
															+			ret = do_chunk_alloc(trans, root,
														
 
															+					     num_bytes + 2 * 1024 * 1024,
														
 
															+					     data, 1);
														
 
															+			if (ret < 0)
														
 
															+				goto error;
														
 
															+			BUG_ON(ret);
														
 
															+			chunk_alloc_done = 1;
														
 
															+			search_start = orig_search_start;
														
 
															+			goto new_group;
														
 
															+		} else {
														
 
															+			ret = -ENOSPC;
														
 
															+			goto error;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * this is going to seach through all of the existing block groups it
														
 
															+	 * can find, so if we don't find something we need to see if we can
														
 
															+	 * allocate what we need.
														
 
															+	 */
														
 
															+	ret = find_free_space(root, &block_group, &search_start,
														
 
															+			      total_needed, data);
														
 
															+	if (ret == -ENOSPC) {
														
 
															+		/*
														
 
															+		 * instead of allocating, start at the original search start
														
 
															+		 * and see if there is something to be found, if not then we
														
 
															+		 * allocate
														
 
															+		 */
														
 
															+		if (search_start != orig_search_start) {
														
 
															+			if (last_ptr && *last_ptr) {
														
 
															+				*last_ptr = 0;
														
 
															+				total_needed += empty_cluster;
														
 
															+			}
														
 
															+			search_start = orig_search_start;
														
 
															+			goto new_group;
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+		 * we've already allocated, we're pretty screwed
														
 
															+		 */
														
 
															+		if (chunk_alloc_done) {
														
 
															+			goto error;
														
 
															+		} else if (!allowed_chunk_alloc && block_group &&
														
 
															+			   block_group_bits(block_group, data)) {
														
 
															+			block_group->space_info->force_alloc = 1;
														
 
															+			goto error;
														
 
															+		} else if (!allowed_chunk_alloc) {
														
 
															+			goto error;
														
 
															+		}
														
 
															+
														
 
															+		ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024,
														
 
															+				     data, 1);
														
 
															+		if (ret < 0)
														
 
															+			goto error;
														
 
															+
														
 
															+		BUG_ON(ret);
														
 
															+		chunk_alloc_done = 1;
														
 
															+		if (block_group)
														
 
															+			search_start = block_group->key.objectid +
														
 
															+				block_group->key.offset;
														
 
															+		else
														
 
															+			search_start = orig_search_start;
														
 
															+		goto new_group;
														
 
															+	}
														
 
															+
														
 
															+	if (ret)
														
 
															+		goto error;
														
 
															+
														
 
															+	search_start = stripe_align(root, search_start);
														
 
															+	ins->objectid = search_start;
														
 
															+	ins->offset = num_bytes;
														
 
															+
														
 
															+	if (ins->objectid + num_bytes >= search_end) {
														
 
															+		search_start = orig_search_start;
														
 
															+		if (chunk_alloc_done) {
														
 
															+			ret = -ENOSPC;
														
 
															+			goto error;
														
 
															+		}
														
 
															+		goto new_group;
														
 
															+	}
														
 
															+
														
 
															+	if (ins->objectid + num_bytes >
														
 
															+	    block_group->key.objectid + block_group->key.offset) {
														
 
															+		if (search_start == orig_search_start && chunk_alloc_done) {
														
 
															+			ret = -ENOSPC;
														
 
															+			goto error;
														
 
															+		}
														
 
															+		search_start = block_group->key.objectid +
														
 
															+			block_group->key.offset;
														
 
															+		goto new_group;
														
 
															+	}
														
 
															+
														
 
															+	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
														
 
															+	    ins->objectid < exclude_start + exclude_nr)) {
														
 
															+		search_start = exclude_start + exclude_nr;
														
 
															+		goto new_group;
														
 
															+	}
														
 
															+
														
 
															+	if (!(data & BTRFS_BLOCK_GROUP_DATA))
														
 
															+		trans->block_group = block_group;
														
 
															+
														
 
															+	ins->offset = num_bytes;
														
 
															+	if (last_ptr) {
														
 
															+		*last_ptr = ins->objectid + ins->offset;
														
 
															+		if (*last_ptr ==
														
 
															+		    btrfs_super_total_bytes(&root->fs_info->super_copy))
														
 
															+			*last_ptr = 0;
														
 
															+	}
														
 
															+
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+	struct list_head *l;
														
 
															+
														
 
															+	printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
														
 
															+	       info->total_bytes - info->bytes_used - info->bytes_pinned -
														
 
															+	       info->bytes_reserved, (info->full) ? "" : "not ");
														
 
															+
														
 
															+	spin_lock(&info->lock);
														
 
															+	list_for_each(l, &info->block_groups) {
														
 
															+		cache = list_entry(l, struct btrfs_block_group_cache, list);
														
 
															+		spin_lock(&cache->lock);
														
 
															+		printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
														
 
															+		       "%Lu pinned %Lu reserved\n",
														
 
															+		       cache->key.objectid, cache->key.offset,
														
 
															+		       btrfs_block_group_used(&cache->item),
														
 
															+		       cache->pinned, cache->reserved);
														
 
															+		btrfs_dump_free_space(cache, bytes);
														
 
															+		spin_unlock(&cache->lock);
														
 
															+	}
														
 
															+	spin_unlock(&info->lock);
														
 
															+}
														
 
															+
														
 
															+static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
														
 
															+				  struct btrfs_root *root,
														
 
															+				  u64 num_bytes, u64 min_alloc_size,
														
 
															+				  u64 empty_size, u64 hint_byte,
														
 
															+				  u64 search_end, struct btrfs_key *ins,
														
 
															+				  u64 data)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u64 search_start = 0;
														
 
															+	u64 alloc_profile;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	if (data) {
														
 
															+		alloc_profile = info->avail_data_alloc_bits &
														
 
															+			        info->data_alloc_profile;
														
 
															+		data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
														
 
															+	} else if (root == root->fs_info->chunk_root) {
														
 
															+		alloc_profile = info->avail_system_alloc_bits &
														
 
															+			        info->system_alloc_profile;
														
 
															+		data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
														
 
															+	} else {
														
 
															+		alloc_profile = info->avail_metadata_alloc_bits &
														
 
															+			        info->metadata_alloc_profile;
														
 
															+		data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
														
 
															+	}
														
 
															+again:
														
 
															+	data = reduce_alloc_profile(root, data);
														
 
															+	/*
														
 
															+	 * the only place that sets empty_size is btrfs_realloc_node, which
														
 
															+	 * is not called recursively on allocations
														
 
															+	 */
														
 
															+	if (empty_size || root->ref_cows) {
														
 
															+		if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
														
 
															+			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
														
 
															+				     2 * 1024 * 1024,
														
 
															+				     BTRFS_BLOCK_GROUP_METADATA |
														
 
															+				     (info->metadata_alloc_profile &
														
 
															+				      info->avail_metadata_alloc_bits), 0);
														
 
															+		}
														
 
															+		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
														
 
															+				     num_bytes + 2 * 1024 * 1024, data, 0);
														
 
															+	}
														
 
															+
														
 
															+	WARN_ON(num_bytes < root->sectorsize);
														
 
															+	ret = find_free_extent(trans, root, num_bytes, empty_size,
														
 
															+			       search_start, search_end, hint_byte, ins,
														
 
															+			       trans->alloc_exclude_start,
														
 
															+			       trans->alloc_exclude_nr, data);
														
 
															+
														
 
															+	if (ret == -ENOSPC && num_bytes > min_alloc_size) {
														
 
															+		num_bytes = num_bytes >> 1;
														
 
															+		num_bytes = num_bytes & ~(root->sectorsize - 1);
														
 
															+		num_bytes = max(num_bytes, min_alloc_size);
														
 
															+		do_chunk_alloc(trans, root->fs_info->extent_root,
														
 
															+			       num_bytes, data, 1);
														
 
															+		goto again;
														
 
															+	}
														
 
															+	if (ret) {
														
 
															+		struct btrfs_space_info *sinfo;
														
 
															+
														
 
															+		sinfo = __find_space_info(root->fs_info, data);
														
 
															+		printk("allocation failed flags %Lu, wanted %Lu\n",
														
 
															+		       data, num_bytes);
														
 
															+		dump_space_info(sinfo, num_bytes);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
														
 
															+	if (!cache) {
														
 
															+		printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
														
 
															+		return -ENOSPC;
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	maybe_lock_mutex(root);
														
 
															+	cache = btrfs_lookup_block_group(root->fs_info, start);
														
 
															+	if (!cache) {
														
 
															+		printk(KERN_ERR "Unable to find block group for %Lu\n", start);
														
 
															+		maybe_unlock_mutex(root);
														
 
															+		return -ENOSPC;
														
 
															+	}
														
 
															+	btrfs_add_free_space(cache, start, len);
														
 
															+	update_reserved_extents(root, start, len, 0);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
														
 
															+				  struct btrfs_root *root,
														
 
															+				  u64 num_bytes, u64 min_alloc_size,
														
 
															+				  u64 empty_size, u64 hint_byte,
														
 
															+				  u64 search_end, struct btrfs_key *ins,
														
 
															+				  u64 data)
														
 
															+{
														
 
															+	int ret;
														
 
															+	maybe_lock_mutex(root);
														
 
															+	ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
														
 
															+				     empty_size, hint_byte, search_end, ins,
														
 
															+				     data);
														
 
															+	update_reserved_extents(root, ins->objectid, ins->offset, 1);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
														
 
															+					 struct btrfs_root *root, u64 parent,
														
 
															+					 u64 root_objectid, u64 ref_generation,
														
 
															+					 u64 owner, struct btrfs_key *ins)
														
 
															+{
														
 
															+	int ret;
														
 
															+	int pending_ret;
														
 
															+	u64 super_used;
														
 
															+	u64 root_used;
														
 
															+	u64 num_bytes = ins->offset;
														
 
															+	u32 sizes[2];
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	struct btrfs_root *extent_root = info->extent_root;
														
 
															+	struct btrfs_extent_item *extent_item;
														
 
															+	struct btrfs_extent_ref *ref;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key keys[2];
														
 
															+
														
 
															+	if (parent == 0)
														
 
															+		parent = ins->objectid;
														
 
															+
														
 
															+	/* block accounting for super block */
														
 
															+	spin_lock_irq(&info->delalloc_lock);
														
 
															+	super_used = btrfs_super_bytes_used(&info->super_copy);
														
 
															+	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
														
 
															+	spin_unlock_irq(&info->delalloc_lock);
														
 
															+
														
 
															+	/* block accounting for root item */
														
 
															+	root_used = btrfs_root_used(&root->root_item);
														
 
															+	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
														
 
															+
														
 
															+	if (root == extent_root) {
														
 
															+		struct pending_extent_op *extent_op;
														
 
															+
														
 
															+		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
														
 
															+		BUG_ON(!extent_op);
														
 
															+
														
 
															+		extent_op->type = PENDING_EXTENT_INSERT;
														
 
															+		extent_op->bytenr = ins->objectid;
														
 
															+		extent_op->num_bytes = ins->offset;
														
 
															+		extent_op->parent = parent;
														
 
															+		extent_op->orig_parent = 0;
														
 
															+		extent_op->generation = ref_generation;
														
 
															+		extent_op->orig_generation = 0;
														
 
															+		extent_op->level = (int)owner;
														
 
															+
														
 
															+		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
														
 
															+				ins->objectid + ins->offset - 1,
														
 
															+				EXTENT_LOCKED, GFP_NOFS);
														
 
															+		set_state_private(&root->fs_info->extent_ins,
														
 
															+				  ins->objectid, (unsigned long)extent_op);
														
 
															+		goto update_block;
														
 
															+	}
														
 
															+
														
 
															+	memcpy(&keys[0], ins, sizeof(*ins));
														
 
															+	keys[1].objectid = ins->objectid;
														
 
															+	keys[1].type = BTRFS_EXTENT_REF_KEY;
														
 
															+	keys[1].offset = parent;
														
 
															+	sizes[0] = sizeof(*extent_item);
														
 
															+	sizes[1] = sizeof(*ref);
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
														
 
															+				       sizes, 2);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+				     struct btrfs_extent_item);
														
 
															+	btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
														
 
															+	ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
														
 
															+			     struct btrfs_extent_ref);
														
 
															+
														
 
															+	btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
														
 
															+	btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
														
 
															+	btrfs_set_ref_objectid(path->nodes[0], ref, owner);
														
 
															+	btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+
														
 
															+	trans->alloc_exclude_start = 0;
														
 
															+	trans->alloc_exclude_nr = 0;
														
 
															+	btrfs_free_path(path);
														
 
															+	finish_current_insert(trans, extent_root);
														
 
															+	pending_ret = del_pending_extents(trans, extent_root);
														
 
															+
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	if (pending_ret) {
														
 
															+		ret = pending_ret;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+update_block:
														
 
															+	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
														
 
															+	if (ret) {
														
 
															+		printk("update block group failed for %Lu %Lu\n",
														
 
															+		       ins->objectid, ins->offset);
														
 
															+		BUG();
														
 
															+	}
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
														
 
															+				struct btrfs_root *root, u64 parent,
														
 
															+				u64 root_objectid, u64 ref_generation,
														
 
															+				u64 owner, struct btrfs_key *ins)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
														
 
															+		return 0;
														
 
															+	maybe_lock_mutex(root);
														
 
															+	ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
														
 
															+					    ref_generation, owner, ins);
														
 
															+	update_reserved_extents(root, ins->objectid, ins->offset, 0);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this is used by the tree logging recovery code.  It records that
														
 
															+ * an extent has been allocated and makes sure to clear the free
														
 
															+ * space cache bits as well
														
 
															+ */
														
 
															+int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
														
 
															+				struct btrfs_root *root, u64 parent,
														
 
															+				u64 root_objectid, u64 ref_generation,
														
 
															+				u64 owner, struct btrfs_key *ins)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+
														
 
															+	maybe_lock_mutex(root);
														
 
															+	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
														
 
															+	cache_block_group(root, block_group);
														
 
															+
														
 
															+	ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
														
 
															+	BUG_ON(ret);
														
 
															+	ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
														
 
															+					    ref_generation, owner, ins);
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * finds a free extent and does all the dirty work required for allocation
														
 
															+ * returns the key for the extent through ins, and a tree buffer for
														
 
															+ * the first block of the extent through buf.
														
 
															+ *
														
 
															+ * returns 0 if everything worked, non-zero otherwise.
														
 
															+ */
														
 
															+int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root,
														
 
															+		       u64 num_bytes, u64 parent, u64 min_alloc_size,
														
 
															+		       u64 root_objectid, u64 ref_generation,
														
 
															+		       u64 owner_objectid, u64 empty_size, u64 hint_byte,
														
 
															+		       u64 search_end, struct btrfs_key *ins, u64 data)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	maybe_lock_mutex(root);
														
 
															+
														
 
															+	ret = __btrfs_reserve_extent(trans, root, num_bytes,
														
 
															+				     min_alloc_size, empty_size, hint_byte,
														
 
															+				     search_end, ins, data);
														
 
															+	BUG_ON(ret);
														
 
															+	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
														
 
															+		ret = __btrfs_alloc_reserved_extent(trans, root, parent,
														
 
															+					root_objectid, ref_generation,
														
 
															+					owner_objectid, ins);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+	} else {
														
 
															+		update_reserved_extents(root, ins->objectid, ins->offset, 1);
														
 
															+	}
														
 
															+	maybe_unlock_mutex(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
														
 
															+					    struct btrfs_root *root,
														
 
															+					    u64 bytenr, u32 blocksize)
														
 
															+{
														
 
															+	struct extent_buffer *buf;
														
 
															+
														
 
															+	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
														
 
															+	if (!buf)
														
 
															+		return ERR_PTR(-ENOMEM);
														
 
															+	btrfs_set_header_generation(buf, trans->transid);
														
 
															+	btrfs_tree_lock(buf);
														
 
															+	clean_tree_block(trans, root, buf);
														
 
															+	btrfs_set_buffer_uptodate(buf);
														
 
															+	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
														
 
															+		set_extent_dirty(&root->dirty_log_pages, buf->start,
														
 
															+			 buf->start + buf->len - 1, GFP_NOFS);
														
 
															+	} else {
														
 
															+		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
														
 
															+			 buf->start + buf->len - 1, GFP_NOFS);
														
 
															+	}
														
 
															+	trans->blocks_used++;
														
 
															+	return buf;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to allocate a block for a given tree
														
 
															+ * returns the tree buffer or NULL.
														
 
															+ */
														
 
															+struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
														
 
															+					     struct btrfs_root *root,
														
 
															+					     u32 blocksize, u64 parent,
														
 
															+					     u64 root_objectid,
														
 
															+					     u64 ref_generation,
														
 
															+					     int level,
														
 
															+					     u64 hint,
														
 
															+					     u64 empty_size)
														
 
															+{
														
 
															+	struct btrfs_key ins;
														
 
															+	int ret;
														
 
															+	struct extent_buffer *buf;
														
 
															+
														
 
															+	ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
														
 
															+				 root_objectid, ref_generation, level,
														
 
															+				 empty_size, hint, (u64)-1, &ins, 0);
														
 
															+	if (ret) {
														
 
															+		BUG_ON(ret > 0);
														
 
															+		return ERR_PTR(ret);
														
 
															+	}
														
 
															+
														
 
															+	buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
														
 
															+	return buf;
														
 
															+}
														
 
															+
														
 
															+int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root, struct extent_buffer *leaf)
														
 
															+{
														
 
															+	u64 leaf_owner;
														
 
															+	u64 leaf_generation;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	int i;
														
 
															+	int nritems;
														
 
															+	int ret;
														
 
															+
														
 
															+	BUG_ON(!btrfs_is_leaf(leaf));
														
 
															+	nritems = btrfs_header_nritems(leaf);
														
 
															+	leaf_owner = btrfs_header_owner(leaf);
														
 
															+	leaf_generation = btrfs_header_generation(leaf);
														
 
															+
														
 
															+	for (i = 0; i < nritems; i++) {
														
 
															+		u64 disk_bytenr;
														
 
															+		cond_resched();
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, i);
														
 
															+		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
														
 
															+			continue;
														
 
															+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
														
 
															+		if (btrfs_file_extent_type(leaf, fi) ==
														
 
															+		    BTRFS_FILE_EXTENT_INLINE)
														
 
															+			continue;
														
 
															+		/*
														
 
															+		 * FIXME make sure to insert a trans record that
														
 
															+		 * repeats the snapshot del on crash
														
 
															+		 */
														
 
															+		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
														
 
															+		if (disk_bytenr == 0)
														
 
															+			continue;
														
 
															+
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+		ret = __btrfs_free_extent(trans, root, disk_bytenr,
														
 
															+				btrfs_file_extent_disk_num_bytes(leaf, fi),
														
 
															+				leaf->start, leaf_owner, leaf_generation,
														
 
															+				key.objectid, 0);
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		atomic_inc(&root->fs_info->throttle_gen);
														
 
															+		wake_up(&root->fs_info->transaction_throttle);
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
														
 
															+					struct btrfs_root *root,
														
 
															+					struct btrfs_leaf_ref *ref)
														
 
															+{
														
 
															+	int i;
														
 
															+	int ret;
														
 
															+	struct btrfs_extent_info *info = ref->extents;
														
 
															+
														
 
															+	for (i = 0; i < ref->nritems; i++) {
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+		ret = __btrfs_free_extent(trans, root, info->bytenr,
														
 
															+					  info->num_bytes, ref->bytenr,
														
 
															+					  ref->owner, ref->generation,
														
 
															+					  info->objectid, 0);
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+		atomic_inc(&root->fs_info->throttle_gen);
														
 
															+		wake_up(&root->fs_info->transaction_throttle);
														
 
															+		cond_resched();
														
 
															+
														
 
															+		BUG_ON(ret);
														
 
															+		info++;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
														
 
															+			      u32 *refs)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+#if 0 // some debugging code in case we see problems here
														
 
															+	/* if the refs count is one, it won't get increased again.  But
														
 
															+	 * if the ref count is > 1, someone may be decreasing it at
														
 
															+	 * the same time we are.
														
 
															+	 */
														
 
															+	if (*refs != 1) {
														
 
															+		struct extent_buffer *eb = NULL;
														
 
															+		eb = btrfs_find_create_tree_block(root, start, len);
														
 
															+		if (eb)
														
 
															+			btrfs_tree_lock(eb);
														
 
															+
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+		ret = lookup_extent_ref(NULL, root, start, len, refs);
														
 
															+		BUG_ON(ret);
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+		if (eb) {
														
 
															+			btrfs_tree_unlock(eb);
														
 
															+			free_extent_buffer(eb);
														
 
															+		}
														
 
															+		if (*refs == 1) {
														
 
															+			printk("block %llu went down to one during drop_snap\n",
														
 
															+			       (unsigned long long)start);
														
 
															+		}
														
 
															+
														
 
															+	}
														
 
															+#endif
														
 
															+
														
 
															+	cond_resched();
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function for drop_snapshot, this walks down the tree dropping ref
														
 
															+ * counts as it goes.
														
 
															+ */
														
 
															+static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *root,
														
 
															+				   struct btrfs_path *path, int *level)
														
 
															+{
														
 
															+	u64 root_owner;
														
 
															+	u64 root_gen;
														
 
															+	u64 bytenr;
														
 
															+	u64 ptr_gen;
														
 
															+	struct extent_buffer *next;
														
 
															+	struct extent_buffer *cur;
														
 
															+	struct extent_buffer *parent;
														
 
															+	struct btrfs_leaf_ref *ref;
														
 
															+	u32 blocksize;
														
 
															+	int ret;
														
 
															+	u32 refs;
														
 
															+
														
 
															+	WARN_ON(*level < 0);
														
 
															+	WARN_ON(*level >= BTRFS_MAX_LEVEL);
														
 
															+	ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
														
 
															+				path->nodes[*level]->len, &refs);
														
 
															+	BUG_ON(ret);
														
 
															+	if (refs > 1)
														
 
															+		goto out;
														
 
															+
														
 
															+	/*
														
 
															+	 * walk down to the last node level and free all the leaves
														
 
															+	 */
														
 
															+	while(*level >= 0) {
														
 
															+		WARN_ON(*level < 0);
														
 
															+		WARN_ON(*level >= BTRFS_MAX_LEVEL);
														
 
															+		cur = path->nodes[*level];
														
 
															+
														
 
															+		if (btrfs_header_level(cur) != *level)
														
 
															+			WARN_ON(1);
														
 
															+
														
 
															+		if (path->slots[*level] >=
														
 
															+		    btrfs_header_nritems(cur))
														
 
															+			break;
														
 
															+		if (*level == 0) {
														
 
															+			ret = btrfs_drop_leaf_ref(trans, root, cur);
														
 
															+			BUG_ON(ret);
														
 
															+			break;
														
 
															+		}
														
 
															+		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
														
 
															+		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
														
 
															+		blocksize = btrfs_level_size(root, *level - 1);
														
 
															+
														
 
															+		ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
														
 
															+		BUG_ON(ret);
														
 
															+		if (refs != 1) {
														
 
															+			parent = path->nodes[*level];
														
 
															+			root_owner = btrfs_header_owner(parent);
														
 
															+			root_gen = btrfs_header_generation(parent);
														
 
															+			path->slots[*level]++;
														
 
															+
														
 
															+			mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+			ret = __btrfs_free_extent(trans, root, bytenr,
														
 
															+						blocksize, parent->start,
														
 
															+						root_owner, root_gen,
														
 
															+						*level - 1, 1);
														
 
															+			BUG_ON(ret);
														
 
															+			mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+			atomic_inc(&root->fs_info->throttle_gen);
														
 
															+			wake_up(&root->fs_info->transaction_throttle);
														
 
															+			cond_resched();
														
 
															+
														
 
															+			continue;
														
 
															+		}
														
 
															+		/*
														
 
															+		 * at this point, we have a single ref, and since the
														
 
															+		 * only place referencing this extent is a dead root
														
 
															+		 * the reference count should never go higher.
														
 
															+		 * So, we don't need to check it again
														
 
															+		 */
														
 
															+		if (*level == 1) {
														
 
															+			ref = btrfs_lookup_leaf_ref(root, bytenr);
														
 
															+			if (ref && ref->generation != ptr_gen) {
														
 
															+				btrfs_free_leaf_ref(root, ref);
														
 
															+				ref = NULL;
														
 
															+			}
														
 
															+			if (ref) {
														
 
															+				ret = cache_drop_leaf_ref(trans, root, ref);
														
 
															+				BUG_ON(ret);
														
 
															+				btrfs_remove_leaf_ref(root, ref);
														
 
															+				btrfs_free_leaf_ref(root, ref);
														
 
															+				*level = 0;
														
 
															+				break;
														
 
															+			}
														
 
															+			if (printk_ratelimit()) {
														
 
															+				printk("leaf ref miss for bytenr %llu\n",
														
 
															+				       (unsigned long long)bytenr);
														
 
															+			}
														
 
															+		}
														
 
															+		next = btrfs_find_tree_block(root, bytenr, blocksize);
														
 
															+		if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
														
 
															+			free_extent_buffer(next);
														
 
															+
														
 
															+			next = read_tree_block(root, bytenr, blocksize,
														
 
															+					       ptr_gen);
														
 
															+			cond_resched();
														
 
															+#if 0
														
 
															+			/*
														
 
															+			 * this is a debugging check and can go away
														
 
															+			 * the ref should never go all the way down to 1
														
 
															+			 * at this point
														
 
															+			 */
														
 
															+			ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
														
 
															+						&refs);
														
 
															+			BUG_ON(ret);
														
 
															+			WARN_ON(refs != 1);
														
 
															+#endif
														
 
															+		}
														
 
															+		WARN_ON(*level <= 0);
														
 
															+		if (path->nodes[*level-1])
														
 
															+			free_extent_buffer(path->nodes[*level-1]);
														
 
															+		path->nodes[*level-1] = next;
														
 
															+		*level = btrfs_header_level(next);
														
 
															+		path->slots[*level] = 0;
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+out:
														
 
															+	WARN_ON(*level < 0);
														
 
															+	WARN_ON(*level >= BTRFS_MAX_LEVEL);
														
 
															+
														
 
															+	if (path->nodes[*level] == root->node) {
														
 
															+		parent = path->nodes[*level];
														
 
															+		bytenr = path->nodes[*level]->start;
														
 
															+	} else {
														
 
															+		parent = path->nodes[*level + 1];
														
 
															+		bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
														
 
															+	}
														
 
															+
														
 
															+	blocksize = btrfs_level_size(root, *level);
														
 
															+	root_owner = btrfs_header_owner(parent);
														
 
															+	root_gen = btrfs_header_generation(parent);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
														
 
															+				  parent->start, root_owner, root_gen,
														
 
															+				  *level, 1);
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+	free_extent_buffer(path->nodes[*level]);
														
 
															+	path->nodes[*level] = NULL;
														
 
															+	*level += 1;
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	cond_resched();
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper for dropping snapshots.  This walks back up the tree in the path
														
 
															+ * to find the first node higher up where we haven't yet gone through
														
 
															+ * all the slots
														
 
															+ */
														
 
															+static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *root,
														
 
															+				 struct btrfs_path *path, int *level)
														
 
															+{
														
 
															+	u64 root_owner;
														
 
															+	u64 root_gen;
														
 
															+	struct btrfs_root_item *root_item = &root->root_item;
														
 
															+	int i;
														
 
															+	int slot;
														
 
															+	int ret;
														
 
															+
														
 
															+	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
														
 
															+		slot = path->slots[i];
														
 
															+		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
														
 
															+			struct extent_buffer *node;
														
 
															+			struct btrfs_disk_key disk_key;
														
 
															+			node = path->nodes[i];
														
 
															+			path->slots[i]++;
														
 
															+			*level = i;
														
 
															+			WARN_ON(*level == 0);
														
 
															+			btrfs_node_key(node, &disk_key, path->slots[i]);
														
 
															+			memcpy(&root_item->drop_progress,
														
 
															+			       &disk_key, sizeof(disk_key));
														
 
															+			root_item->drop_level = i;
														
 
															+			return 0;
														
 
															+		} else {
														
 
															+			struct extent_buffer *parent;
														
 
															+			if (path->nodes[*level] == root->node)
														
 
															+				parent = path->nodes[*level];
														
 
															+			else
														
 
															+				parent = path->nodes[*level + 1];
														
 
															+
														
 
															+			root_owner = btrfs_header_owner(parent);
														
 
															+			root_gen = btrfs_header_generation(parent);
														
 
															+			ret = btrfs_free_extent(trans, root,
														
 
															+						path->nodes[*level]->start,
														
 
															+						path->nodes[*level]->len,
														
 
															+						parent->start, root_owner,
														
 
															+						root_gen, *level, 1);
														
 
															+			BUG_ON(ret);
														
 
															+			free_extent_buffer(path->nodes[*level]);
														
 
															+			path->nodes[*level] = NULL;
														
 
															+			*level = i + 1;
														
 
															+		}
														
 
															+	}
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * drop the reference count on the tree rooted at 'snap'.  This traverses
														
 
															+ * the tree freeing any blocks that have a ref count of zero after being
														
 
															+ * decremented.
														
 
															+ */
														
 
															+int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+			*root)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	int level;
														
 
															+	struct btrfs_path *path;
														
 
															+	int i;
														
 
															+	int orig_level;
														
 
															+	struct btrfs_root_item *root_item = &root->root_item;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	level = btrfs_header_level(root->node);
														
 
															+	orig_level = level;
														
 
															+	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
														
 
															+		path->nodes[level] = root->node;
														
 
															+		extent_buffer_get(root->node);
														
 
															+		path->slots[level] = 0;
														
 
															+	} else {
														
 
															+		struct btrfs_key key;
														
 
															+		struct btrfs_disk_key found_key;
														
 
															+		struct extent_buffer *node;
														
 
															+
														
 
															+		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
														
 
															+		level = root_item->drop_level;
														
 
															+		path->lowest_level = level;
														
 
															+		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+		if (wret < 0) {
														
 
															+			ret = wret;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		node = path->nodes[level];
														
 
															+		btrfs_node_key(node, &found_key, path->slots[level]);
														
 
															+		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
														
 
															+			       sizeof(found_key)));
														
 
															+		/*
														
 
															+		 * unlock our path, this is safe because only this
														
 
															+		 * function is allowed to delete this snapshot
														
 
															+		 */
														
 
															+		for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
														
 
															+			if (path->nodes[i] && path->locks[i]) {
														
 
															+				path->locks[i] = 0;
														
 
															+				btrfs_tree_unlock(path->nodes[i]);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	while(1) {
														
 
															+		wret = walk_down_tree(trans, root, path, &level);
														
 
															+		if (wret > 0)
														
 
															+			break;
														
 
															+		if (wret < 0)
														
 
															+			ret = wret;
														
 
															+
														
 
															+		wret = walk_up_tree(trans, root, path, &level);
														
 
															+		if (wret > 0)
														
 
															+			break;
														
 
															+		if (wret < 0)
														
 
															+			ret = wret;
														
 
															+		if (trans->transaction->in_commit) {
														
 
															+			ret = -EAGAIN;
														
 
															+			break;
														
 
															+		}
														
 
															+		atomic_inc(&root->fs_info->throttle_gen);
														
 
															+		wake_up(&root->fs_info->transaction_throttle);
														
 
															+	}
														
 
															+	for (i = 0; i <= orig_level; i++) {
														
 
															+		if (path->nodes[i]) {
														
 
															+			free_extent_buffer(path->nodes[i]);
														
 
															+			path->nodes[i] = NULL;
														
 
															+		}
														
 
															+	}
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static unsigned long calc_ra(unsigned long start, unsigned long last,
														
 
															+			     unsigned long nr)
														
 
															+{
														
 
															+	return min(last, start + nr - 1);
														
 
															+}
														
 
															+
														
 
															+static int noinline relocate_inode_pages(struct inode *inode, u64 start,
														
 
															+					 u64 len)
														
 
															+{
														
 
															+	u64 page_start;
														
 
															+	u64 page_end;
														
 
															+	unsigned long first_index;
														
 
															+	unsigned long last_index;
														
 
															+	unsigned long i;
														
 
															+	struct page *page;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	struct file_ra_state *ra;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	unsigned int total_read = 0;
														
 
															+	unsigned int total_dirty = 0;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	ra = kzalloc(sizeof(*ra), GFP_NOFS);
														
 
															+
														
 
															+	mutex_lock(&inode->i_mutex);
														
 
															+	first_index = start >> PAGE_CACHE_SHIFT;
														
 
															+	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	/* make sure the dirty trick played by the caller work */
														
 
															+	ret = invalidate_inode_pages2_range(inode->i_mapping,
														
 
															+					    first_index, last_index);
														
 
															+	if (ret)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	file_ra_state_init(ra, inode->i_mapping);
														
 
															+
														
 
															+	for (i = first_index ; i <= last_index; i++) {
														
 
															+		if (total_read % ra->ra_pages == 0) {
														
 
															+			btrfs_force_ra(inode->i_mapping, ra, NULL, i,
														
 
															+				       calc_ra(i, last_index, ra->ra_pages));
														
 
															+		}
														
 
															+		total_read++;
														
 
															+again:
														
 
															+		if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
														
 
															+			BUG_ON(1);
														
 
															+		page = grab_cache_page(inode->i_mapping, i);
														
 
															+		if (!page) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto out_unlock;
														
 
															+		}
														
 
															+		if (!PageUptodate(page)) {
														
 
															+			btrfs_readpage(NULL, page);
														
 
															+			lock_page(page);
														
 
															+			if (!PageUptodate(page)) {
														
 
															+				unlock_page(page);
														
 
															+				page_cache_release(page);
														
 
															+				ret = -EIO;
														
 
															+				goto out_unlock;
														
 
															+			}
														
 
															+		}
														
 
															+		wait_on_page_writeback(page);
														
 
															+
														
 
															+		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+		page_end = page_start + PAGE_CACHE_SIZE - 1;
														
 
															+		lock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+
														
 
															+		ordered = btrfs_lookup_ordered_extent(inode, page_start);
														
 
															+		if (ordered) {
														
 
															+			unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+			unlock_page(page);
														
 
															+			page_cache_release(page);
														
 
															+			btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+			goto again;
														
 
															+		}
														
 
															+		set_page_extent_mapped(page);
														
 
															+
														
 
															+		btrfs_set_extent_delalloc(inode, page_start, page_end);
														
 
															+		if (i == first_index)
														
 
															+			set_extent_bits(io_tree, page_start, page_end,
														
 
															+					EXTENT_BOUNDARY, GFP_NOFS);
														
 
															+
														
 
															+		set_page_dirty(page);
														
 
															+		total_dirty++;
														
 
															+
														
 
															+		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+		unlock_page(page);
														
 
															+		page_cache_release(page);
														
 
															+	}
														
 
															+
														
 
															+out_unlock:
														
 
															+	kfree(ra);
														
 
															+	mutex_unlock(&inode->i_mutex);
														
 
															+	balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int noinline relocate_data_extent(struct inode *reloc_inode,
														
 
															+					 struct btrfs_key *extent_key,
														
 
															+					 u64 offset)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
														
 
															+	struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
														
 
															+	struct extent_map *em;
														
 
															+
														
 
															+	em = alloc_extent_map(GFP_NOFS);
														
 
															+	BUG_ON(!em || IS_ERR(em));
														
 
															+
														
 
															+	em->start = extent_key->objectid - offset;
														
 
															+	em->len = extent_key->offset;
														
 
															+	em->block_start = extent_key->objectid;
														
 
															+	em->bdev = root->fs_info->fs_devices->latest_bdev;
														
 
															+	set_bit(EXTENT_FLAG_PINNED, &em->flags);
														
 
															+
														
 
															+	/* setup extent map to cheat btrfs_readpage */
														
 
															+	mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex);
														
 
															+	while (1) {
														
 
															+		int ret;
														
 
															+		spin_lock(&em_tree->lock);
														
 
															+		ret = add_extent_mapping(em_tree, em);
														
 
															+		spin_unlock(&em_tree->lock);
														
 
															+		if (ret != -EEXIST) {
														
 
															+			free_extent_map(em);
														
 
															+			break;
														
 
															+		}
														
 
															+		btrfs_drop_extent_cache(reloc_inode, em->start,
														
 
															+					em->start + em->len - 1, 0);
														
 
															+	}
														
 
															+	mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex);
														
 
															+
														
 
															+	return relocate_inode_pages(reloc_inode, extent_key->objectid - offset,
														
 
															+				    extent_key->offset);
														
 
															+}
														
 
															+
														
 
															+struct btrfs_ref_path {
														
 
															+	u64 extent_start;
														
 
															+	u64 nodes[BTRFS_MAX_LEVEL];
														
 
															+	u64 root_objectid;
														
 
															+	u64 root_generation;
														
 
															+	u64 owner_objectid;
														
 
															+	u32 num_refs;
														
 
															+	int lowest_level;
														
 
															+	int current_level;
														
 
															+};
														
 
															+
														
 
															+struct disk_extent {
														
 
															+	u64 disk_bytenr;
														
 
															+	u64 disk_num_bytes;
														
 
															+	u64 offset;
														
 
															+	u64 num_bytes;
														
 
															+};
														
 
															+
														
 
															+static int is_cowonly_root(u64 root_objectid)
														
 
															+{
														
 
															+	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
														
 
															+	    root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
														
 
															+	    root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
														
 
															+	    root_objectid == BTRFS_DEV_TREE_OBJECTID ||
														
 
															+	    root_objectid == BTRFS_TREE_LOG_OBJECTID)
														
 
															+		return 1;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
														
 
															+				    struct btrfs_root *extent_root,
														
 
															+				    struct btrfs_ref_path *ref_path,
														
 
															+				    int first_time)
														
 
															+{
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_extent_ref *ref;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	u64 bytenr;
														
 
															+	u32 nritems;
														
 
															+	int level;
														
 
															+	int ret = 1;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	if (first_time) {
														
 
															+		ref_path->lowest_level = -1;
														
 
															+		ref_path->current_level = -1;
														
 
															+		goto walk_up;
														
 
															+	}
														
 
															+walk_down:
														
 
															+	level = ref_path->current_level - 1;
														
 
															+	while (level >= -1) {
														
 
															+		u64 parent;
														
 
															+		if (level < ref_path->lowest_level)
														
 
															+			break;
														
 
															+
														
 
															+		if (level >= 0) {
														
 
															+			bytenr = ref_path->nodes[level];
														
 
															+		} else {
														
 
															+			bytenr = ref_path->extent_start;
														
 
															+		}
														
 
															+		BUG_ON(bytenr == 0);
														
 
															+
														
 
															+		parent = ref_path->nodes[level + 1];
														
 
															+		ref_path->nodes[level + 1] = 0;
														
 
															+		ref_path->current_level = level;
														
 
															+		BUG_ON(parent == 0);
														
 
															+
														
 
															+		key.objectid = bytenr;
														
 
															+		key.offset = parent + 1;
														
 
															+		key.type = BTRFS_EXTENT_REF_KEY;
														
 
															+
														
 
															+		ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+		BUG_ON(ret == 0);
														
 
															+
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			ret = btrfs_next_leaf(extent_root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			if (ret > 0)
														
 
															+				goto next;
														
 
															+			leaf = path->nodes[0];
														
 
															+		}
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		if (found_key.objectid == bytenr &&
														
 
															+				found_key.type == BTRFS_EXTENT_REF_KEY)
														
 
															+			goto found;
														
 
															+next:
														
 
															+		level--;
														
 
															+		btrfs_release_path(extent_root, path);
														
 
															+		if (need_resched()) {
														
 
															+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+			cond_resched();
														
 
															+			mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+		}
														
 
															+	}
														
 
															+	/* reached lowest level */
														
 
															+	ret = 1;
														
 
															+	goto out;
														
 
															+walk_up:
														
 
															+	level = ref_path->current_level;
														
 
															+	while (level < BTRFS_MAX_LEVEL - 1) {
														
 
															+		u64 ref_objectid;
														
 
															+		if (level >= 0) {
														
 
															+			bytenr = ref_path->nodes[level];
														
 
															+		} else {
														
 
															+			bytenr = ref_path->extent_start;
														
 
															+		}
														
 
															+		BUG_ON(bytenr == 0);
														
 
															+
														
 
															+		key.objectid = bytenr;
														
 
															+		key.offset = 0;
														
 
															+		key.type = BTRFS_EXTENT_REF_KEY;
														
 
															+
														
 
															+		ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			ret = btrfs_next_leaf(extent_root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			if (ret > 0) {
														
 
															+				/* the extent was freed by someone */
														
 
															+				if (ref_path->lowest_level == level)
														
 
															+					goto out;
														
 
															+				btrfs_release_path(extent_root, path);
														
 
															+				goto walk_down;
														
 
															+			}
														
 
															+			leaf = path->nodes[0];
														
 
															+		}
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		if (found_key.objectid != bytenr ||
														
 
															+				found_key.type != BTRFS_EXTENT_REF_KEY) {
														
 
															+			/* the extent was freed by someone */
														
 
															+			if (ref_path->lowest_level == level) {
														
 
															+				ret = 1;
														
 
															+				goto out;
														
 
															+			}
														
 
															+			btrfs_release_path(extent_root, path);
														
 
															+			goto walk_down;
														
 
															+		}
														
 
															+found:
														
 
															+		ref = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				struct btrfs_extent_ref);
														
 
															+		ref_objectid = btrfs_ref_objectid(leaf, ref);
														
 
															+		if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
														
 
															+			if (first_time) {
														
 
															+				level = (int)ref_objectid;
														
 
															+				BUG_ON(level >= BTRFS_MAX_LEVEL);
														
 
															+				ref_path->lowest_level = level;
														
 
															+				ref_path->current_level = level;
														
 
															+				ref_path->nodes[level] = bytenr;
														
 
															+			} else {
														
 
															+				WARN_ON(ref_objectid != level);
														
 
															+			}
														
 
															+		} else {
														
 
															+			WARN_ON(level != -1);
														
 
															+		}
														
 
															+		first_time = 0;
														
 
															+
														
 
															+		if (ref_path->lowest_level == level) {
														
 
															+			ref_path->owner_objectid = ref_objectid;
														
 
															+			ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+		 * the block is tree root or the block isn't in reference
														
 
															+		 * counted tree.
														
 
															+		 */
														
 
															+		if (found_key.objectid == found_key.offset ||
														
 
															+		    is_cowonly_root(btrfs_ref_root(leaf, ref))) {
														
 
															+			ref_path->root_objectid = btrfs_ref_root(leaf, ref);
														
 
															+			ref_path->root_generation =
														
 
															+				btrfs_ref_generation(leaf, ref);
														
 
															+			if (level < 0) {
														
 
															+				/* special reference from the tree log */
														
 
															+				ref_path->nodes[0] = found_key.offset;
														
 
															+				ref_path->current_level = 0;
														
 
															+			}
														
 
															+			ret = 0;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		level++;
														
 
															+		BUG_ON(ref_path->nodes[level] != 0);
														
 
															+		ref_path->nodes[level] = found_key.offset;
														
 
															+		ref_path->current_level = level;
														
 
															+
														
 
															+		/*
														
 
															+		 * the reference was created in the running transaction,
														
 
															+		 * no need to continue walking up.
														
 
															+		 */
														
 
															+		if (btrfs_ref_generation(leaf, ref) == trans->transid) {
														
 
															+			ref_path->root_objectid = btrfs_ref_root(leaf, ref);
														
 
															+			ref_path->root_generation =
														
 
															+				btrfs_ref_generation(leaf, ref);
														
 
															+			ret = 0;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		btrfs_release_path(extent_root, path);
														
 
															+		if (need_resched()) {
														
 
															+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+			cond_resched();
														
 
															+			mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+		}
														
 
															+	}
														
 
															+	/* reached max tree level, but no tree root found. */
														
 
															+	BUG();
														
 
															+out:
														
 
															+	mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
														
 
															+				struct btrfs_root *extent_root,
														
 
															+				struct btrfs_ref_path *ref_path,
														
 
															+				u64 extent_start)
														
 
															+{
														
 
															+	memset(ref_path, 0, sizeof(*ref_path));
														
 
															+	ref_path->extent_start = extent_start;
														
 
															+
														
 
															+	return __next_ref_path(trans, extent_root, ref_path, 1);
														
 
															+}
														
 
															+
														
 
															+static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *extent_root,
														
 
															+			       struct btrfs_ref_path *ref_path)
														
 
															+{
														
 
															+	return __next_ref_path(trans, extent_root, ref_path, 0);
														
 
															+}
														
 
															+
														
 
															+static int noinline get_new_locations(struct inode *reloc_inode,
														
 
															+				      struct btrfs_key *extent_key,
														
 
															+				      u64 offset, int no_fragment,
														
 
															+				      struct disk_extent **extents,
														
 
															+				      int *nr_extents)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct disk_extent *exts = *extents;
														
 
															+	struct btrfs_key found_key;
														
 
															+	u64 cur_pos;
														
 
															+	u64 last_byte;
														
 
															+	u32 nritems;
														
 
															+	int nr = 0;
														
 
															+	int max = *nr_extents;
														
 
															+	int ret;
														
 
															+
														
 
															+	WARN_ON(!no_fragment && *extents);
														
 
															+	if (!exts) {
														
 
															+		max = 1;
														
 
															+		exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
														
 
															+		if (!exts)
														
 
															+			return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	cur_pos = extent_key->objectid - offset;
														
 
															+	last_byte = extent_key->objectid + extent_key->offset;
														
 
															+	ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
														
 
															+				       cur_pos, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	if (ret > 0) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	while (1) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			if (ret > 0)
														
 
															+				break;
														
 
															+			leaf = path->nodes[0];
														
 
															+		}
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		if (found_key.offset != cur_pos ||
														
 
															+		    found_key.type != BTRFS_EXTENT_DATA_KEY ||
														
 
															+		    found_key.objectid != reloc_inode->i_ino)
														
 
															+			break;
														
 
															+
														
 
															+		fi = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				    struct btrfs_file_extent_item);
														
 
															+		if (btrfs_file_extent_type(leaf, fi) !=
														
 
															+		    BTRFS_FILE_EXTENT_REG ||
														
 
															+		    btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
														
 
															+			break;
														
 
															+
														
 
															+		if (nr == max) {
														
 
															+			struct disk_extent *old = exts;
														
 
															+			max *= 2;
														
 
															+			exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
														
 
															+			memcpy(exts, old, sizeof(*exts) * nr);
														
 
															+			if (old != *extents)
														
 
															+				kfree(old);
														
 
															+		}
														
 
															+
														
 
															+		exts[nr].disk_bytenr =
														
 
															+			btrfs_file_extent_disk_bytenr(leaf, fi);
														
 
															+		exts[nr].disk_num_bytes =
														
 
															+			btrfs_file_extent_disk_num_bytes(leaf, fi);
														
 
															+		exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
														
 
															+		exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
														
 
															+		WARN_ON(exts[nr].offset > 0);
														
 
															+		WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
														
 
															+
														
 
															+		cur_pos += exts[nr].num_bytes;
														
 
															+		nr++;
														
 
															+
														
 
															+		if (cur_pos + offset >= last_byte)
														
 
															+			break;
														
 
															+
														
 
															+		if (no_fragment) {
														
 
															+			ret = 1;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+
														
 
															+	WARN_ON(cur_pos + offset > last_byte);
														
 
															+	if (cur_pos + offset < last_byte) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	if (ret) {
														
 
															+		if (exts != *extents)
														
 
															+			kfree(exts);
														
 
															+	} else {
														
 
															+		*extents = exts;
														
 
															+		*nr_extents = nr;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
														
 
															+					struct btrfs_root *root,
														
 
															+					struct btrfs_path *path,
														
 
															+					struct btrfs_key *extent_key,
														
 
															+					struct btrfs_key *leaf_key,
														
 
															+					struct btrfs_ref_path *ref_path,
														
 
															+					struct disk_extent *new_extents,
														
 
															+					int nr_extents)
														
 
															+{
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	struct inode *inode = NULL;
														
 
															+	struct btrfs_key key;
														
 
															+	u64 lock_start = 0;
														
 
															+	u64 lock_end = 0;
														
 
															+	u64 num_bytes;
														
 
															+	u64 ext_offset;
														
 
															+	u64 first_pos;
														
 
															+	u32 nritems;
														
 
															+	int nr_scaned = 0;
														
 
															+	int extent_locked = 0;
														
 
															+	int ret;
														
 
															+
														
 
															+	memcpy(&key, leaf_key, sizeof(key));
														
 
															+	first_pos = INT_LIMIT(loff_t) - extent_key->offset;
														
 
															+	if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
														
 
															+		if (key.objectid < ref_path->owner_objectid ||
														
 
															+		    (key.objectid == ref_path->owner_objectid &&
														
 
															+		     key.type < BTRFS_EXTENT_DATA_KEY)) {
														
 
															+			key.objectid = ref_path->owner_objectid;
														
 
															+			key.type = BTRFS_EXTENT_DATA_KEY;
														
 
															+			key.offset = 0;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	while (1) {
														
 
															+		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+next:
														
 
															+		if (extent_locked && ret > 0) {
														
 
															+			/*
														
 
															+			 * the file extent item was modified by someone
														
 
															+			 * before the extent got locked.
														
 
															+			 */
														
 
															+			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
														
 
															+				      lock_end, GFP_NOFS);
														
 
															+			extent_locked = 0;
														
 
															+		}
														
 
															+
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			if (++nr_scaned > 2)
														
 
															+				break;
														
 
															+
														
 
															+			BUG_ON(extent_locked);
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			if (ret > 0)
														
 
															+				break;
														
 
															+			leaf = path->nodes[0];
														
 
															+			nritems = btrfs_header_nritems(leaf);
														
 
															+		}
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
														
 
															+
														
 
															+		if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
														
 
															+			if ((key.objectid > ref_path->owner_objectid) ||
														
 
															+			    (key.objectid == ref_path->owner_objectid &&
														
 
															+			     key.type > BTRFS_EXTENT_DATA_KEY) ||
														
 
															+			    (key.offset >= first_pos + extent_key->offset))
														
 
															+				break;
														
 
															+		}
														
 
															+
														
 
															+		if (inode && key.objectid != inode->i_ino) {
														
 
															+			BUG_ON(extent_locked);
														
 
															+			btrfs_release_path(root, path);
														
 
															+			mutex_unlock(&inode->i_mutex);
														
 
															+			iput(inode);
														
 
															+			inode = NULL;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		if (key.type != BTRFS_EXTENT_DATA_KEY) {
														
 
															+			path->slots[0]++;
														
 
															+			ret = 1;
														
 
															+			goto next;
														
 
															+		}
														
 
															+		fi = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				    struct btrfs_file_extent_item);
														
 
															+		if ((btrfs_file_extent_type(leaf, fi) !=
														
 
															+		     BTRFS_FILE_EXTENT_REG) ||
														
 
															+		    (btrfs_file_extent_disk_bytenr(leaf, fi) !=
														
 
															+		     extent_key->objectid)) {
														
 
															+			path->slots[0]++;
														
 
															+			ret = 1;
														
 
															+			goto next;
														
 
															+		}
														
 
															+
														
 
															+		num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
														
 
															+		ext_offset = btrfs_file_extent_offset(leaf, fi);
														
 
															+
														
 
															+		if (first_pos > key.offset - ext_offset)
														
 
															+			first_pos = key.offset - ext_offset;
														
 
															+
														
 
															+		if (!extent_locked) {
														
 
															+			lock_start = key.offset;
														
 
															+			lock_end = lock_start + num_bytes - 1;
														
 
															+		} else {
														
 
															+			BUG_ON(lock_start != key.offset);
														
 
															+			BUG_ON(lock_end - lock_start + 1 < num_bytes);
														
 
															+		}
														
 
															+
														
 
															+		if (!inode) {
														
 
															+			btrfs_release_path(root, path);
														
 
															+
														
 
															+			inode = btrfs_iget_locked(root->fs_info->sb,
														
 
															+						  key.objectid, root);
														
 
															+			if (inode->i_state & I_NEW) {
														
 
															+				BTRFS_I(inode)->root = root;
														
 
															+				BTRFS_I(inode)->location.objectid =
														
 
															+					key.objectid;
														
 
															+				BTRFS_I(inode)->location.type =
														
 
															+					BTRFS_INODE_ITEM_KEY;
														
 
															+				BTRFS_I(inode)->location.offset = 0;
														
 
															+				btrfs_read_locked_inode(inode);
														
 
															+				unlock_new_inode(inode);
														
 
															+			}
														
 
															+			/*
														
 
															+			 * some code call btrfs_commit_transaction while
														
 
															+			 * holding the i_mutex, so we can't use mutex_lock
														
 
															+			 * here.
														
 
															+			 */
														
 
															+			if (is_bad_inode(inode) ||
														
 
															+			    !mutex_trylock(&inode->i_mutex)) {
														
 
															+				iput(inode);
														
 
															+				inode = NULL;
														
 
															+				key.offset = (u64)-1;
														
 
															+				goto skip;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if (!extent_locked) {
														
 
															+			struct btrfs_ordered_extent *ordered;
														
 
															+
														
 
															+			btrfs_release_path(root, path);
														
 
															+
														
 
															+			lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
														
 
															+				    lock_end, GFP_NOFS);
														
 
															+			ordered = btrfs_lookup_first_ordered_extent(inode,
														
 
															+								    lock_end);
														
 
															+			if (ordered &&
														
 
															+			    ordered->file_offset <= lock_end &&
														
 
															+			    ordered->file_offset + ordered->len > lock_start) {
														
 
															+				unlock_extent(&BTRFS_I(inode)->io_tree,
														
 
															+					      lock_start, lock_end, GFP_NOFS);
														
 
															+				btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+				btrfs_put_ordered_extent(ordered);
														
 
															+				key.offset += num_bytes;
														
 
															+				goto skip;
														
 
															+			}
														
 
															+			if (ordered)
														
 
															+				btrfs_put_ordered_extent(ordered);
														
 
															+
														
 
															+			mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+			extent_locked = 1;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		if (nr_extents == 1) {
														
 
															+			/* update extent pointer in place */
														
 
															+			btrfs_set_file_extent_generation(leaf, fi,
														
 
															+						trans->transid);
														
 
															+			btrfs_set_file_extent_disk_bytenr(leaf, fi,
														
 
															+						new_extents[0].disk_bytenr);
														
 
															+			btrfs_set_file_extent_disk_num_bytes(leaf, fi,
														
 
															+						new_extents[0].disk_num_bytes);
														
 
															+			ext_offset += new_extents[0].offset;
														
 
															+			btrfs_set_file_extent_offset(leaf, fi, ext_offset);
														
 
															+			btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+			btrfs_drop_extent_cache(inode, key.offset,
														
 
															+						key.offset + num_bytes - 1, 0);
														
 
															+
														
 
															+			ret = btrfs_inc_extent_ref(trans, root,
														
 
															+						new_extents[0].disk_bytenr,
														
 
															+						new_extents[0].disk_num_bytes,
														
 
															+						leaf->start,
														
 
															+						root->root_key.objectid,
														
 
															+						trans->transid,
														
 
															+						key.objectid);
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			ret = btrfs_free_extent(trans, root,
														
 
															+						extent_key->objectid,
														
 
															+						extent_key->offset,
														
 
															+						leaf->start,
														
 
															+						btrfs_header_owner(leaf),
														
 
															+						btrfs_header_generation(leaf),
														
 
															+						key.objectid, 0);
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			btrfs_release_path(root, path);
														
 
															+			key.offset += num_bytes;
														
 
															+		} else {
														
 
															+			u64 alloc_hint;
														
 
															+			u64 extent_len;
														
 
															+			int i;
														
 
															+			/*
														
 
															+			 * drop old extent pointer at first, then insert the
														
 
															+			 * new pointers one bye one
														
 
															+			 */
														
 
															+			btrfs_release_path(root, path);
														
 
															+			ret = btrfs_drop_extents(trans, root, inode, key.offset,
														
 
															+						 key.offset + num_bytes,
														
 
															+						 key.offset, &alloc_hint);
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			for (i = 0; i < nr_extents; i++) {
														
 
															+				if (ext_offset >= new_extents[i].num_bytes) {
														
 
															+					ext_offset -= new_extents[i].num_bytes;
														
 
															+					continue;
														
 
															+				}
														
 
															+				extent_len = min(new_extents[i].num_bytes -
														
 
															+						 ext_offset, num_bytes);
														
 
															+
														
 
															+				ret = btrfs_insert_empty_item(trans, root,
														
 
															+							      path, &key,
														
 
															+							      sizeof(*fi));
														
 
															+				BUG_ON(ret);
														
 
															+
														
 
															+				leaf = path->nodes[0];
														
 
															+				fi = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+						struct btrfs_file_extent_item);
														
 
															+				btrfs_set_file_extent_generation(leaf, fi,
														
 
															+							trans->transid);
														
 
															+				btrfs_set_file_extent_type(leaf, fi,
														
 
															+							BTRFS_FILE_EXTENT_REG);
														
 
															+				btrfs_set_file_extent_disk_bytenr(leaf, fi,
														
 
															+						new_extents[i].disk_bytenr);
														
 
															+				btrfs_set_file_extent_disk_num_bytes(leaf, fi,
														
 
															+						new_extents[i].disk_num_bytes);
														
 
															+				btrfs_set_file_extent_num_bytes(leaf, fi,
														
 
															+							extent_len);
														
 
															+				ext_offset += new_extents[i].offset;
														
 
															+				btrfs_set_file_extent_offset(leaf, fi,
														
 
															+							ext_offset);
														
 
															+				btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+				btrfs_drop_extent_cache(inode, key.offset,
														
 
															+						key.offset + extent_len - 1, 0);
														
 
															+
														
 
															+				ret = btrfs_inc_extent_ref(trans, root,
														
 
															+						new_extents[i].disk_bytenr,
														
 
															+						new_extents[i].disk_num_bytes,
														
 
															+						leaf->start,
														
 
															+						root->root_key.objectid,
														
 
															+						trans->transid, key.objectid);
														
 
															+				BUG_ON(ret);
														
 
															+				btrfs_release_path(root, path);
														
 
															+
														
 
															+				inode_add_bytes(inode, extent_len);
														
 
															+
														
 
															+				ext_offset = 0;
														
 
															+				num_bytes -= extent_len;
														
 
															+				key.offset += extent_len;
														
 
															+
														
 
															+				if (num_bytes == 0)
														
 
															+					break;
														
 
															+			}
														
 
															+			BUG_ON(i >= nr_extents);
														
 
															+		}
														
 
															+
														
 
															+		if (extent_locked) {
														
 
															+			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
														
 
															+				      lock_end, GFP_NOFS);
														
 
															+			extent_locked = 0;
														
 
															+		}
														
 
															+skip:
														
 
															+		if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
														
 
															+		    key.offset >= first_pos + extent_key->offset)
														
 
															+			break;
														
 
															+
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	if (inode) {
														
 
															+		mutex_unlock(&inode->i_mutex);
														
 
															+		if (extent_locked) {
														
 
															+			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
														
 
															+				      lock_end, GFP_NOFS);
														
 
															+		}
														
 
															+		iput(inode);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
														
 
															+			    u64 num_bytes, u64 new_bytenr)
														
 
															+{
														
 
															+	set_extent_bits(&root->fs_info->reloc_mapping_tree,
														
 
															+			orig_bytenr, orig_bytenr + num_bytes - 1,
														
 
															+			EXTENT_LOCKED, GFP_NOFS);
														
 
															+	set_state_private(&root->fs_info->reloc_mapping_tree,
														
 
															+			  orig_bytenr, new_bytenr);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
														
 
															+			    u64 num_bytes, u64 *new_bytenr)
														
 
															+{
														
 
															+	u64 bytenr;
														
 
															+	u64 cur_bytenr = orig_bytenr;
														
 
															+	u64 prev_bytenr = orig_bytenr;
														
 
															+	int ret;
														
 
															+
														
 
															+	while (1) {
														
 
															+		ret = get_state_private(&root->fs_info->reloc_mapping_tree,
														
 
															+					cur_bytenr, &bytenr);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+		prev_bytenr = cur_bytenr;
														
 
															+		cur_bytenr = bytenr;
														
 
															+	}
														
 
															+
														
 
															+	if (orig_bytenr == cur_bytenr)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	if (prev_bytenr != orig_bytenr) {
														
 
															+		set_state_private(&root->fs_info->reloc_mapping_tree,
														
 
															+				  orig_bytenr, cur_bytenr);
														
 
															+	}
														
 
															+	*new_bytenr = cur_bytenr;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void btrfs_free_reloc_mappings(struct btrfs_root *root)
														
 
															+{
														
 
															+	clear_extent_bits(&root->fs_info->reloc_mapping_tree,
														
 
															+			  0, (u64)-1, -1, GFP_NOFS);
														
 
															+}
														
 
															+
														
 
															+int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       struct extent_buffer *buf, u64 orig_start)
														
 
															+{
														
 
															+	int level;
														
 
															+	int ret;
														
 
															+
														
 
															+	BUG_ON(btrfs_header_generation(buf) != trans->transid);
														
 
															+	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
														
 
															+
														
 
															+	level = btrfs_header_level(buf);
														
 
															+	if (level == 0) {
														
 
															+		struct btrfs_leaf_ref *ref;
														
 
															+		struct btrfs_leaf_ref *orig_ref;
														
 
															+
														
 
															+		orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
														
 
															+		if (!orig_ref)
														
 
															+			return -ENOENT;
														
 
															+
														
 
															+		ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
														
 
															+		if (!ref) {
														
 
															+			btrfs_free_leaf_ref(root, orig_ref);
														
 
															+			return -ENOMEM;
														
 
															+		}
														
 
															+
														
 
															+		ref->nritems = orig_ref->nritems;
														
 
															+		memcpy(ref->extents, orig_ref->extents,
														
 
															+			sizeof(ref->extents[0]) * ref->nritems);
														
 
															+
														
 
															+		btrfs_free_leaf_ref(root, orig_ref);
														
 
															+
														
 
															+		ref->root_gen = trans->transid;
														
 
															+		ref->bytenr = buf->start;
														
 
															+		ref->owner = btrfs_header_owner(buf);
														
 
															+		ref->generation = btrfs_header_generation(buf);
														
 
															+		ret = btrfs_add_leaf_ref(root, ref, 0);
														
 
															+		WARN_ON(ret);
														
 
															+		btrfs_free_leaf_ref(root, ref);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline invalidate_extent_cache(struct btrfs_root *root,
														
 
															+					struct extent_buffer *leaf,
														
 
															+					struct btrfs_block_group_cache *group,
														
 
															+					struct btrfs_root *target_root)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct inode *inode = NULL;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	u64 num_bytes;
														
 
															+	u64 skip_objectid = 0;
														
 
															+	u32 nritems;
														
 
															+	u32 i;
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(leaf);
														
 
															+	for (i = 0; i < nritems; i++) {
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, i);
														
 
															+		if (key.objectid == skip_objectid ||
														
 
															+		    key.type != BTRFS_EXTENT_DATA_KEY)
														
 
															+			continue;
														
 
															+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
														
 
															+		if (btrfs_file_extent_type(leaf, fi) ==
														
 
															+		    BTRFS_FILE_EXTENT_INLINE)
														
 
															+			continue;
														
 
															+		if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
														
 
															+			continue;
														
 
															+		if (!inode || inode->i_ino != key.objectid) {
														
 
															+			iput(inode);
														
 
															+			inode = btrfs_ilookup(target_root->fs_info->sb,
														
 
															+					      key.objectid, target_root, 1);
														
 
															+		}
														
 
															+		if (!inode) {
														
 
															+			skip_objectid = key.objectid;
														
 
															+			continue;
														
 
															+		}
														
 
															+		num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
														
 
															+
														
 
															+		lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
														
 
															+			    key.offset + num_bytes - 1, GFP_NOFS);
														
 
															+		mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+		btrfs_drop_extent_cache(inode, key.offset,
														
 
															+					key.offset + num_bytes - 1, 1);
														
 
															+		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+		unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
														
 
															+			      key.offset + num_bytes - 1, GFP_NOFS);
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	iput(inode);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
														
 
															+					struct btrfs_root *root,
														
 
															+					struct extent_buffer *leaf,
														
 
															+					struct btrfs_block_group_cache *group,
														
 
															+					struct inode *reloc_inode)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key extent_key;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	struct btrfs_leaf_ref *ref;
														
 
															+	struct disk_extent *new_extent;
														
 
															+	u64 bytenr;
														
 
															+	u64 num_bytes;
														
 
															+	u32 nritems;
														
 
															+	u32 i;
														
 
															+	int ext_index;
														
 
															+	int nr_extent;
														
 
															+	int ret;
														
 
															+
														
 
															+	new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
														
 
															+	BUG_ON(!new_extent);
														
 
															+
														
 
															+	ref = btrfs_lookup_leaf_ref(root, leaf->start);
														
 
															+	BUG_ON(!ref);
														
 
															+
														
 
															+	ext_index = -1;
														
 
															+	nritems = btrfs_header_nritems(leaf);
														
 
															+	for (i = 0; i < nritems; i++) {
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, i);
														
 
															+		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
														
 
															+			continue;
														
 
															+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
														
 
															+		if (btrfs_file_extent_type(leaf, fi) ==
														
 
															+		    BTRFS_FILE_EXTENT_INLINE)
														
 
															+			continue;
														
 
															+		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
														
 
															+		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
														
 
															+		if (bytenr == 0)
														
 
															+			continue;
														
 
															+
														
 
															+		ext_index++;
														
 
															+		if (bytenr >= group->key.objectid + group->key.offset ||
														
 
															+		    bytenr + num_bytes <= group->key.objectid)
														
 
															+			continue;
														
 
															+
														
 
															+		extent_key.objectid = bytenr;
														
 
															+		extent_key.offset = num_bytes;
														
 
															+		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+		nr_extent = 1;
														
 
															+		ret = get_new_locations(reloc_inode, &extent_key,
														
 
															+					group->key.objectid, 1,
														
 
															+					&new_extent, &nr_extent);
														
 
															+		if (ret > 0)
														
 
															+			continue;
														
 
															+		BUG_ON(ret < 0);
														
 
															+
														
 
															+		BUG_ON(ref->extents[ext_index].bytenr != bytenr);
														
 
															+		BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
														
 
															+		ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
														
 
															+		ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
														
 
															+
														
 
															+		btrfs_set_file_extent_generation(leaf, fi, trans->transid);
														
 
															+		btrfs_set_file_extent_disk_bytenr(leaf, fi,
														
 
															+						new_extent->disk_bytenr);
														
 
															+		btrfs_set_file_extent_disk_num_bytes(leaf, fi,
														
 
															+						new_extent->disk_num_bytes);
														
 
															+		new_extent->offset += btrfs_file_extent_offset(leaf, fi);
														
 
															+		btrfs_set_file_extent_offset(leaf, fi, new_extent->offset);
														
 
															+		btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+		ret = btrfs_inc_extent_ref(trans, root,
														
 
															+					new_extent->disk_bytenr,
														
 
															+					new_extent->disk_num_bytes,
														
 
															+					leaf->start,
														
 
															+					root->root_key.objectid,
														
 
															+					trans->transid, key.objectid);
														
 
															+		BUG_ON(ret);
														
 
															+		ret = btrfs_free_extent(trans, root,
														
 
															+					bytenr, num_bytes, leaf->start,
														
 
															+					btrfs_header_owner(leaf),
														
 
															+					btrfs_header_generation(leaf),
														
 
															+					key.objectid, 0);
														
 
															+		BUG_ON(ret);
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	kfree(new_extent);
														
 
															+	BUG_ON(ext_index + 1 != ref->nritems);
														
 
															+	btrfs_free_leaf_ref(root, ref);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_free_reloc_root(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_root *reloc_root;
														
 
															+
														
 
															+	if (root->reloc_root) {
														
 
															+		reloc_root = root->reloc_root;
														
 
															+		root->reloc_root = NULL;
														
 
															+		list_add(&reloc_root->dead_list,
														
 
															+			 &root->fs_info->dead_reloc_roots);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *reloc_root;
														
 
															+	struct btrfs_root *prev_root = NULL;
														
 
															+	struct list_head dead_roots;
														
 
															+	int ret;
														
 
															+	unsigned long nr;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&dead_roots);
														
 
															+	list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
														
 
															+
														
 
															+	while (!list_empty(&dead_roots)) {
														
 
															+		reloc_root = list_entry(dead_roots.prev,
														
 
															+					struct btrfs_root, dead_list);
														
 
															+		list_del_init(&reloc_root->dead_list);
														
 
															+
														
 
															+		BUG_ON(reloc_root->commit_root != NULL);
														
 
															+		while (1) {
														
 
															+			trans = btrfs_join_transaction(root, 1);
														
 
															+			BUG_ON(!trans);
														
 
															+
														
 
															+			mutex_lock(&root->fs_info->drop_mutex);
														
 
															+			ret = btrfs_drop_snapshot(trans, reloc_root);
														
 
															+			if (ret != -EAGAIN)
														
 
															+				break;
														
 
															+			mutex_unlock(&root->fs_info->drop_mutex);
														
 
															+
														
 
															+			nr = trans->blocks_used;
														
 
															+			ret = btrfs_end_transaction(trans, root);
														
 
															+			BUG_ON(ret);
														
 
															+			btrfs_btree_balance_dirty(root, nr);
														
 
															+		}
														
 
															+
														
 
															+		free_extent_buffer(reloc_root->node);
														
 
															+
														
 
															+		ret = btrfs_del_root(trans, root->fs_info->tree_root,
														
 
															+				     &reloc_root->root_key);
														
 
															+		BUG_ON(ret);
														
 
															+		mutex_unlock(&root->fs_info->drop_mutex);
														
 
															+
														
 
															+		nr = trans->blocks_used;
														
 
															+		ret = btrfs_end_transaction(trans, root);
														
 
															+		BUG_ON(ret);
														
 
															+		btrfs_btree_balance_dirty(root, nr);
														
 
															+
														
 
															+		kfree(prev_root);
														
 
															+		prev_root = reloc_root;
														
 
															+	}
														
 
															+	if (prev_root) {
														
 
															+		btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
														
 
															+		kfree(prev_root);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_add_dead_reloc_root(struct btrfs_root *root)
														
 
															+{
														
 
															+	list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_root *reloc_root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_key location;
														
 
															+	int found;
														
 
															+	int ret;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->tree_reloc_mutex);
														
 
															+	ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
														
 
															+	BUG_ON(ret);
														
 
															+	found = !list_empty(&root->fs_info->dead_reloc_roots);
														
 
															+	mutex_unlock(&root->fs_info->tree_reloc_mutex);
														
 
															+
														
 
															+	if (found) {
														
 
															+		trans = btrfs_start_transaction(root, 1);
														
 
															+		BUG_ON(!trans);
														
 
															+		ret = btrfs_commit_transaction(trans, root);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+
														
 
															+	location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
														
 
															+	location.offset = (u64)-1;
														
 
															+	location.type = BTRFS_ROOT_ITEM_KEY;
														
 
															+
														
 
															+	reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
														
 
															+	BUG_ON(!reloc_root);
														
 
															+	btrfs_orphan_cleanup(reloc_root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
														
 
															+				    struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_root *reloc_root;
														
 
															+	struct extent_buffer *eb;
														
 
															+	struct btrfs_root_item *root_item;
														
 
															+	struct btrfs_key root_key;
														
 
															+	int ret;
														
 
															+
														
 
															+	BUG_ON(!root->ref_cows);
														
 
															+	if (root->reloc_root)
														
 
															+		return 0;
														
 
															+
														
 
															+	root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
														
 
															+	BUG_ON(!root_item);
														
 
															+
														
 
															+	ret = btrfs_copy_root(trans, root, root->commit_root,
														
 
															+			      &eb, BTRFS_TREE_RELOC_OBJECTID);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
														
 
															+	root_key.offset = root->root_key.objectid;
														
 
															+	root_key.type = BTRFS_ROOT_ITEM_KEY;
														
 
															+
														
 
															+	memcpy(root_item, &root->root_item, sizeof(root_item));
														
 
															+	btrfs_set_root_refs(root_item, 0);
														
 
															+	btrfs_set_root_bytenr(root_item, eb->start);
														
 
															+	btrfs_set_root_level(root_item, btrfs_header_level(eb));
														
 
															+	memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress));
														
 
															+	root_item->drop_level = 0;
														
 
															+
														
 
															+	btrfs_tree_unlock(eb);
														
 
															+	free_extent_buffer(eb);
														
 
															+
														
 
															+	ret = btrfs_insert_root(trans, root->fs_info->tree_root,
														
 
															+				&root_key, root_item);
														
 
															+	BUG_ON(ret);
														
 
															+	kfree(root_item);
														
 
															+
														
 
															+	reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
														
 
															+						 &root_key);
														
 
															+	BUG_ON(!reloc_root);
														
 
															+	reloc_root->last_trans = trans->transid;
														
 
															+	reloc_root->commit_root = NULL;
														
 
															+	reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
														
 
															+
														
 
															+	root->reloc_root = reloc_root;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Core function of space balance.
														
 
															+ *
														
 
															+ * The idea is using reloc trees to relocate tree blocks in reference
														
 
															+ * counted roots. There is one reloc tree for each subvol, all reloc
														
 
															+ * trees share same key objectid. Reloc trees are snapshots of the
														
 
															+ * latest committed roots (subvol root->commit_root). To relocate a tree
														
 
															+ * block referenced by a subvol, the code COW the block through the reloc
														
 
															+ * tree, then update pointer in the subvol to point to the new block.
														
 
															+ * Since all reloc trees share same key objectid, we can easily do special
														
 
															+ * handing to share tree blocks between reloc trees. Once a tree block has
														
 
															+ * been COWed in one reloc tree, we can use the result when the same block
														
 
															+ * is COWed again through other reloc trees.
														
 
															+ */
														
 
															+static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
														
 
															+				      struct btrfs_root *root,
														
 
															+				      struct btrfs_path *path,
														
 
															+				      struct btrfs_key *first_key,
														
 
															+				      struct btrfs_ref_path *ref_path,
														
 
															+				      struct btrfs_block_group_cache *group,
														
 
															+				      struct inode *reloc_inode)
														
 
															+{
														
 
															+	struct btrfs_root *reloc_root;
														
 
															+	struct extent_buffer *eb = NULL;
														
 
															+	struct btrfs_key *keys;
														
 
															+	u64 *nodes;
														
 
															+	int level;
														
 
															+	int lowest_merge;
														
 
															+	int lowest_level = 0;
														
 
															+	int update_refs;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
														
 
															+		lowest_level = ref_path->owner_objectid;
														
 
															+
														
 
															+	if (is_cowonly_root(ref_path->root_objectid)) {
														
 
															+		path->lowest_level = lowest_level;
														
 
															+		ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
														
 
															+		BUG_ON(ret < 0);
														
 
															+		path->lowest_level = 0;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS);
														
 
															+	BUG_ON(!keys);
														
 
															+	nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS);
														
 
															+	BUG_ON(!nodes);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->tree_reloc_mutex);
														
 
															+	ret = init_reloc_tree(trans, root);
														
 
															+	BUG_ON(ret);
														
 
															+	reloc_root = root->reloc_root;
														
 
															+
														
 
															+	path->lowest_level = lowest_level;
														
 
															+	ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0);
														
 
															+	BUG_ON(ret);
														
 
															+	/*
														
 
															+	 * get relocation mapping for tree blocks in the path
														
 
															+	 */
														
 
															+	lowest_merge = BTRFS_MAX_LEVEL;
														
 
															+	for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
														
 
															+		u64 new_bytenr;
														
 
															+		eb = path->nodes[level];
														
 
															+		if (!eb || eb == reloc_root->node)
														
 
															+			continue;
														
 
															+		ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len,
														
 
															+					      &new_bytenr);
														
 
															+		if (ret)
														
 
															+			continue;
														
 
															+		if (level == 0)
														
 
															+			btrfs_item_key_to_cpu(eb, &keys[level], 0);
														
 
															+		else
														
 
															+			btrfs_node_key_to_cpu(eb, &keys[level], 0);
														
 
															+		nodes[level] = new_bytenr;
														
 
															+		lowest_merge = level;
														
 
															+	}
														
 
															+
														
 
															+	update_refs = 0;
														
 
															+	if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
														
 
															+		eb = path->nodes[0];
														
 
															+		if (btrfs_header_generation(eb) < trans->transid)
														
 
															+			update_refs = 1;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_release_path(reloc_root, path);
														
 
															+	/*
														
 
															+	 * merge tree blocks that already relocated in other reloc trees
														
 
															+	 */
														
 
															+	if (lowest_merge != BTRFS_MAX_LEVEL) {
														
 
															+		ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
														
 
															+				       lowest_merge);
														
 
															+		BUG_ON(ret < 0);
														
 
															+	}
														
 
															+	/*
														
 
															+	 * cow any tree blocks that still haven't been relocated
														
 
															+	 */
														
 
															+	ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1);
														
 
															+	BUG_ON(ret);
														
 
															+	/*
														
 
															+	 * if we are relocating data block group, update extent pointers
														
 
															+	 * in the newly created tree leaf.
														
 
															+	 */
														
 
															+	eb = path->nodes[0];
														
 
															+	if (update_refs && nodes[0] != eb->start) {
														
 
															+		ret = replace_extents_in_leaf(trans, reloc_root, eb, group,
														
 
															+					      reloc_inode);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+
														
 
															+	memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL);
														
 
															+	memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL);
														
 
															+	for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
														
 
															+		eb = path->nodes[level];
														
 
															+		if (!eb || eb == reloc_root->node)
														
 
															+			continue;
														
 
															+		BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID);
														
 
															+		nodes[level] = eb->start;
														
 
															+		if (level == 0)
														
 
															+			btrfs_item_key_to_cpu(eb, &keys[level], 0);
														
 
															+		else
														
 
															+			btrfs_node_key_to_cpu(eb, &keys[level], 0);
														
 
															+	}
														
 
															+
														
 
															+	if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
														
 
															+		eb = path->nodes[0];
														
 
															+		extent_buffer_get(eb);
														
 
															+	}
														
 
															+	btrfs_release_path(reloc_root, path);
														
 
															+	/*
														
 
															+	 * replace tree blocks in the fs tree with tree blocks in
														
 
															+	 * the reloc tree.
														
 
															+	 */
														
 
															+	ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
														
 
															+	BUG_ON(ret < 0);
														
 
															+
														
 
															+	if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
														
 
															+		ret = invalidate_extent_cache(reloc_root, eb, group, root);
														
 
															+		BUG_ON(ret);
														
 
															+		free_extent_buffer(eb);
														
 
															+	}
														
 
															+	mutex_unlock(&root->fs_info->tree_reloc_mutex);
														
 
															+
														
 
															+	path->lowest_level = 0;
														
 
															+	kfree(nodes);
														
 
															+	kfree(keys);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
														
 
															+					struct btrfs_root *root,
														
 
															+					struct btrfs_path *path,
														
 
															+					struct btrfs_key *first_key,
														
 
															+					struct btrfs_ref_path *ref_path)
														
 
															+{
														
 
															+	int ret;
														
 
															+	int needs_lock = 0;
														
 
															+
														
 
															+	if (root == root->fs_info->extent_root ||
														
 
															+	    root == root->fs_info->chunk_root ||
														
 
															+	    root == root->fs_info->dev_root) {
														
 
															+		needs_lock = 1;
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	}
														
 
															+
														
 
															+	ret = relocate_one_path(trans, root, path, first_key,
														
 
															+				ref_path, NULL, NULL);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (root == root->fs_info->extent_root)
														
 
															+		btrfs_extent_post_op(trans, root);
														
 
															+	if (needs_lock)
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
														
 
															+				    struct btrfs_root *extent_root,
														
 
															+				    struct btrfs_path *path,
														
 
															+				    struct btrfs_key *extent_key)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+	ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	ret = btrfs_del_item(trans, extent_root, path);
														
 
															+out:
														
 
															+	btrfs_release_path(extent_root, path);
														
 
															+	mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
														
 
															+						struct btrfs_ref_path *ref_path)
														
 
															+{
														
 
															+	struct btrfs_key root_key;
														
 
															+
														
 
															+	root_key.objectid = ref_path->root_objectid;
														
 
															+	root_key.type = BTRFS_ROOT_ITEM_KEY;
														
 
															+	if (is_cowonly_root(ref_path->root_objectid))
														
 
															+		root_key.offset = 0;
														
 
															+	else
														
 
															+		root_key.offset = (u64)-1;
														
 
															+
														
 
															+	return btrfs_read_fs_root_no_name(fs_info, &root_key);
														
 
															+}
														
 
															+
														
 
															+static int noinline relocate_one_extent(struct btrfs_root *extent_root,
														
 
															+					struct btrfs_path *path,
														
 
															+					struct btrfs_key *extent_key,
														
 
															+					struct btrfs_block_group_cache *group,
														
 
															+					struct inode *reloc_inode, int pass)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *found_root;
														
 
															+	struct btrfs_ref_path *ref_path = NULL;
														
 
															+	struct disk_extent *new_extents = NULL;
														
 
															+	int nr_extents = 0;
														
 
															+	int loops;
														
 
															+	int ret;
														
 
															+	int level;
														
 
															+	struct btrfs_key first_key;
														
 
															+	u64 prev_block = 0;
														
 
															+
														
 
															+	mutex_unlock(&extent_root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(extent_root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+
														
 
															+	if (extent_key->objectid == 0) {
														
 
															+		ret = del_extent_zero(trans, extent_root, path, extent_key);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
														
 
															+	if (!ref_path) {
														
 
															+	       ret = -ENOMEM;
														
 
															+	       goto out;
														
 
															+	}
														
 
															+
														
 
															+	for (loops = 0; ; loops++) {
														
 
															+		if (loops == 0) {
														
 
															+			ret = btrfs_first_ref_path(trans, extent_root, ref_path,
														
 
															+						   extent_key->objectid);
														
 
															+		} else {
														
 
															+			ret = btrfs_next_ref_path(trans, extent_root, ref_path);
														
 
															+		}
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+		if (ret > 0)
														
 
															+			break;
														
 
															+
														
 
															+		if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
														
 
															+		    ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
														
 
															+			continue;
														
 
															+
														
 
															+		found_root = read_ref_root(extent_root->fs_info, ref_path);
														
 
															+		BUG_ON(!found_root);
														
 
															+		/*
														
 
															+		 * for reference counted tree, only process reference paths
														
 
															+		 * rooted at the latest committed root.
														
 
															+		 */
														
 
															+		if (found_root->ref_cows &&
														
 
															+		    ref_path->root_generation != found_root->root_key.offset)
														
 
															+			continue;
														
 
															+
														
 
															+		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
														
 
															+			if (pass == 0) {
														
 
															+				/*
														
 
															+				 * copy data extents to new locations
														
 
															+				 */
														
 
															+				u64 group_start = group->key.objectid;
														
 
															+				ret = relocate_data_extent(reloc_inode,
														
 
															+							   extent_key,
														
 
															+							   group_start);
														
 
															+				if (ret < 0)
														
 
															+					goto out;
														
 
															+				break;
														
 
															+			}
														
 
															+			level = 0;
														
 
															+		} else {
														
 
															+			level = ref_path->owner_objectid;
														
 
															+		}
														
 
															+
														
 
															+		if (prev_block != ref_path->nodes[level]) {
														
 
															+			struct extent_buffer *eb;
														
 
															+			u64 block_start = ref_path->nodes[level];
														
 
															+			u64 block_size = btrfs_level_size(found_root, level);
														
 
															+
														
 
															+			eb = read_tree_block(found_root, block_start,
														
 
															+					     block_size, 0);
														
 
															+			btrfs_tree_lock(eb);
														
 
															+			BUG_ON(level != btrfs_header_level(eb));
														
 
															+
														
 
															+			if (level == 0)
														
 
															+				btrfs_item_key_to_cpu(eb, &first_key, 0);
														
 
															+			else
														
 
															+				btrfs_node_key_to_cpu(eb, &first_key, 0);
														
 
															+
														
 
															+			btrfs_tree_unlock(eb);
														
 
															+			free_extent_buffer(eb);
														
 
															+			prev_block = block_start;
														
 
															+		}
														
 
															+
														
 
															+		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
														
 
															+		    pass >= 2) {
														
 
															+			/*
														
 
															+			 * use fallback method to process the remaining
														
 
															+			 * references.
														
 
															+			 */
														
 
															+			if (!new_extents) {
														
 
															+				u64 group_start = group->key.objectid;
														
 
															+				ret = get_new_locations(reloc_inode,
														
 
															+							extent_key,
														
 
															+							group_start, 0,
														
 
															+							&new_extents,
														
 
															+							&nr_extents);
														
 
															+				if (ret < 0)
														
 
															+					goto out;
														
 
															+			}
														
 
															+			btrfs_record_root_in_trans(found_root);
														
 
															+			ret = replace_one_extent(trans, found_root,
														
 
															+						path, extent_key,
														
 
															+						&first_key, ref_path,
														
 
															+						new_extents, nr_extents);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		btrfs_record_root_in_trans(found_root);
														
 
															+		if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
														
 
															+			ret = relocate_tree_block(trans, found_root, path,
														
 
															+						  &first_key, ref_path);
														
 
															+		} else {
														
 
															+			/*
														
 
															+			 * try to update data extent references while
														
 
															+			 * keeping metadata shared between snapshots.
														
 
															+			 */
														
 
															+			ret = relocate_one_path(trans, found_root, path,
														
 
															+						&first_key, ref_path,
														
 
															+						group, reloc_inode);
														
 
															+		}
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_end_transaction(trans, extent_root);
														
 
															+	kfree(new_extents);
														
 
															+	kfree(ref_path);
														
 
															+	mutex_lock(&extent_root->fs_info->alloc_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
														
 
															+{
														
 
															+	u64 num_devices;
														
 
															+	u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
														
 
															+		BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
														
 
															+
														
 
															+	num_devices = root->fs_info->fs_devices->num_devices;
														
 
															+	if (num_devices == 1) {
														
 
															+		stripped |= BTRFS_BLOCK_GROUP_DUP;
														
 
															+		stripped = flags & ~stripped;
														
 
															+
														
 
															+		/* turn raid0 into single device chunks */
														
 
															+		if (flags & BTRFS_BLOCK_GROUP_RAID0)
														
 
															+			return stripped;
														
 
															+
														
 
															+		/* turn mirroring into duplication */
														
 
															+		if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
														
 
															+			     BTRFS_BLOCK_GROUP_RAID10))
														
 
															+			return stripped | BTRFS_BLOCK_GROUP_DUP;
														
 
															+		return flags;
														
 
															+	} else {
														
 
															+		/* they already had raid on here, just return */
														
 
															+		if (flags & stripped)
														
 
															+			return flags;
														
 
															+
														
 
															+		stripped |= BTRFS_BLOCK_GROUP_DUP;
														
 
															+		stripped = flags & ~stripped;
														
 
															+
														
 
															+		/* switch duplicated blocks with raid1 */
														
 
															+		if (flags & BTRFS_BLOCK_GROUP_DUP)
														
 
															+			return stripped | BTRFS_BLOCK_GROUP_RAID1;
														
 
															+
														
 
															+		/* turn single device chunks into raid0 */
														
 
															+		return stripped | BTRFS_BLOCK_GROUP_RAID0;
														
 
															+	}
														
 
															+	return flags;
														
 
															+}
														
 
															+
														
 
															+int __alloc_chunk_for_shrink(struct btrfs_root *root,
														
 
															+		     struct btrfs_block_group_cache *shrink_block_group,
														
 
															+		     int force)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	u64 new_alloc_flags;
														
 
															+	u64 calc;
														
 
															+
														
 
															+	spin_lock(&shrink_block_group->lock);
														
 
															+	if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
														
 
															+		spin_unlock(&shrink_block_group->lock);
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+		trans = btrfs_start_transaction(root, 1);
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+		spin_lock(&shrink_block_group->lock);
														
 
															+
														
 
															+		new_alloc_flags = update_block_group_flags(root,
														
 
															+						   shrink_block_group->flags);
														
 
															+		if (new_alloc_flags != shrink_block_group->flags) {
														
 
															+			calc =
														
 
															+			     btrfs_block_group_used(&shrink_block_group->item);
														
 
															+		} else {
														
 
															+			calc = shrink_block_group->key.offset;
														
 
															+		}
														
 
															+		spin_unlock(&shrink_block_group->lock);
														
 
															+
														
 
															+		do_chunk_alloc(trans, root->fs_info->extent_root,
														
 
															+			       calc + 2 * 1024 * 1024, new_alloc_flags, force);
														
 
															+
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+		btrfs_end_transaction(trans, root);
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	} else
														
 
															+		spin_unlock(&shrink_block_group->lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *root,
														
 
															+				 u64 objectid, u64 size)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_inode_item *item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int ret;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
														
 
															+	memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
														
 
															+	btrfs_set_inode_generation(leaf, item, 1);
														
 
															+	btrfs_set_inode_size(leaf, item, size);
														
 
															+	btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
														
 
															+	btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+	btrfs_release_path(root, path);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
														
 
															+					struct btrfs_block_group_cache *group)
														
 
															+{
														
 
															+	struct inode *inode = NULL;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root;
														
 
															+	struct btrfs_key root_key;
														
 
															+	u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
														
 
															+	int err = 0;
														
 
															+
														
 
															+	root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
														
 
															+	root_key.type = BTRFS_ROOT_ITEM_KEY;
														
 
															+	root_key.offset = (u64)-1;
														
 
															+	root = btrfs_read_fs_root_no_name(fs_info, &root_key);
														
 
															+	if (IS_ERR(root))
														
 
															+		return ERR_CAST(root);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+
														
 
															+	err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
														
 
															+	if (err)
														
 
															+		goto out;
														
 
															+
														
 
															+	err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
														
 
															+	BUG_ON(err);
														
 
															+
														
 
															+	err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
														
 
															+				       group->key.offset, 0);
														
 
															+	BUG_ON(err);
														
 
															+
														
 
															+	inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
														
 
															+	if (inode->i_state & I_NEW) {
														
 
															+		BTRFS_I(inode)->root = root;
														
 
															+		BTRFS_I(inode)->location.objectid = objectid;
														
 
															+		BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
														
 
															+		BTRFS_I(inode)->location.offset = 0;
														
 
															+		btrfs_read_locked_inode(inode);
														
 
															+		unlock_new_inode(inode);
														
 
															+		BUG_ON(is_bad_inode(inode));
														
 
															+	} else {
														
 
															+		BUG_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	err = btrfs_orphan_add(trans, inode);
														
 
															+out:
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	if (err) {
														
 
															+		if (inode)
														
 
															+			iput(inode);
														
 
															+		inode = ERR_PTR(err);
														
 
															+	}
														
 
															+	return inode;
														
 
															+}
														
 
															+
														
 
															+int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct inode *reloc_inode;
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+	struct btrfs_key key;
														
 
															+	u64 cur_byte;
														
 
															+	u64 total_found;
														
 
															+	u32 nritems;
														
 
															+	int ret;
														
 
															+	int progress;
														
 
															+	int pass = 0;
														
 
															+
														
 
															+	root = root->fs_info->extent_root;
														
 
															+
														
 
															+	block_group = btrfs_lookup_block_group(info, group_start);
														
 
															+	BUG_ON(!block_group);
														
 
															+
														
 
															+	printk("btrfs relocating block group %llu flags %llu\n",
														
 
															+	       (unsigned long long)block_group->key.objectid,
														
 
															+	       (unsigned long long)block_group->flags);
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	reloc_inode = create_reloc_inode(info, block_group);
														
 
															+	BUG_ON(IS_ERR(reloc_inode));
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	__alloc_chunk_for_shrink(root, block_group, 1);
														
 
															+	block_group->ro = 1;
														
 
															+	block_group->space_info->total_bytes -= block_group->key.offset;
														
 
															+
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	btrfs_start_delalloc_inodes(info->tree_root);
														
 
															+	btrfs_wait_ordered_extents(info->tree_root, 0);
														
 
															+again:
														
 
															+	total_found = 0;
														
 
															+	progress = 0;
														
 
															+	key.objectid = block_group->key.objectid;
														
 
															+	key.offset = 0;
														
 
															+	key.type = 0;
														
 
															+	cur_byte = key.objectid;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(info->tree_root, 1);
														
 
															+	btrfs_commit_transaction(trans, info->tree_root);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->cleaner_mutex);
														
 
															+	btrfs_clean_old_snapshots(info->tree_root);
														
 
															+	btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
														
 
															+	mutex_unlock(&root->fs_info->cleaner_mutex);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+next:
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			if (ret == 1) {
														
 
															+				ret = 0;
														
 
															+				break;
														
 
															+			}
														
 
															+			leaf = path->nodes[0];
														
 
															+			nritems = btrfs_header_nritems(leaf);
														
 
															+		}
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
														
 
															+
														
 
															+		if (key.objectid >= block_group->key.objectid +
														
 
															+		    block_group->key.offset)
														
 
															+			break;
														
 
															+
														
 
															+		if (progress && need_resched()) {
														
 
															+			btrfs_release_path(root, path);
														
 
															+			mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+			cond_resched();
														
 
															+			mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+			progress = 0;
														
 
															+			continue;
														
 
															+		}
														
 
															+		progress = 1;
														
 
															+
														
 
															+		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
														
 
															+		    key.objectid + key.offset <= cur_byte) {
														
 
															+			path->slots[0]++;
														
 
															+			goto next;
														
 
															+		}
														
 
															+
														
 
															+		total_found++;
														
 
															+		cur_byte = key.objectid + key.offset;
														
 
															+		btrfs_release_path(root, path);
														
 
															+
														
 
															+		__alloc_chunk_for_shrink(root, block_group, 0);
														
 
															+		ret = relocate_one_extent(root, path, &key, block_group,
														
 
															+					  reloc_inode, pass);
														
 
															+		BUG_ON(ret < 0);
														
 
															+
														
 
															+		key.objectid = cur_byte;
														
 
															+		key.type = 0;
														
 
															+		key.offset = 0;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_release_path(root, path);
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	if (pass == 0) {
														
 
															+		btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
														
 
															+		invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
														
 
															+		WARN_ON(reloc_inode->i_mapping->nrpages);
														
 
															+	}
														
 
															+
														
 
															+	if (total_found > 0) {
														
 
															+		printk("btrfs found %llu extents in pass %d\n",
														
 
															+		       (unsigned long long)total_found, pass);
														
 
															+		pass++;
														
 
															+		goto again;
														
 
															+	}
														
 
															+
														
 
															+	/* delete reloc_inode */
														
 
															+	iput(reloc_inode);
														
 
															+
														
 
															+	/* unpin extents in this range */
														
 
															+	trans = btrfs_start_transaction(info->tree_root, 1);
														
 
															+	btrfs_commit_transaction(trans, info->tree_root);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	spin_lock(&block_group->lock);
														
 
															+	WARN_ON(block_group->pinned > 0);
														
 
															+	WARN_ON(block_group->reserved > 0);
														
 
															+	WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
														
 
															+	spin_unlock(&block_group->lock);
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			   struct btrfs_key *key)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int slot;
														
 
															+
														
 
															+	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	while(1) {
														
 
															+		slot = path->slots[0];
														
 
															+		leaf = path->nodes[0];
														
 
															+		if (slot >= btrfs_header_nritems(leaf)) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret == 0)
														
 
															+				continue;
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			break;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
														
 
															+
														
 
															+		if (found_key.objectid >= key->objectid &&
														
 
															+		    found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
														
 
															+			ret = 0;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+	ret = -ENOENT;
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_free_block_groups(struct btrfs_fs_info *info)
														
 
															+{
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+	struct rb_node *n;
														
 
															+
														
 
															+	mutex_lock(&info->alloc_mutex);
														
 
															+	spin_lock(&info->block_group_cache_lock);
														
 
															+	while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
														
 
															+		block_group = rb_entry(n, struct btrfs_block_group_cache,
														
 
															+				       cache_node);
														
 
															+
														
 
															+		spin_unlock(&info->block_group_cache_lock);
														
 
															+		btrfs_remove_free_space_cache(block_group);
														
 
															+		spin_lock(&info->block_group_cache_lock);
														
 
															+
														
 
															+		rb_erase(&block_group->cache_node,
														
 
															+			 &info->block_group_cache_tree);
														
 
															+		spin_lock(&block_group->space_info->lock);
														
 
															+		list_del(&block_group->list);
														
 
															+		spin_unlock(&block_group->space_info->lock);
														
 
															+		kfree(block_group);
														
 
															+	}
														
 
															+	spin_unlock(&info->block_group_cache_lock);
														
 
															+	mutex_unlock(&info->alloc_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_read_block_groups(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	struct btrfs_space_info *space_info;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	root = info->extent_root;
														
 
															+	key.objectid = 0;
														
 
															+	key.offset = 0;
														
 
															+	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	while(1) {
														
 
															+		ret = find_first_block_group(root, path, &key);
														
 
															+		if (ret > 0) {
														
 
															+			ret = 0;
														
 
															+			goto error;
														
 
															+		}
														
 
															+		if (ret != 0)
														
 
															+			goto error;
														
 
															+
														
 
															+		leaf = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		cache = kzalloc(sizeof(*cache), GFP_NOFS);
														
 
															+		if (!cache) {
														
 
															+			ret = -ENOMEM;
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		spin_lock_init(&cache->lock);
														
 
															+		INIT_LIST_HEAD(&cache->list);
														
 
															+		read_extent_buffer(leaf, &cache->item,
														
 
															+				   btrfs_item_ptr_offset(leaf, path->slots[0]),
														
 
															+				   sizeof(cache->item));
														
 
															+		memcpy(&cache->key, &found_key, sizeof(found_key));
														
 
															+
														
 
															+		key.objectid = found_key.objectid + found_key.offset;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		cache->flags = btrfs_block_group_flags(&cache->item);
														
 
															+
														
 
															+		ret = update_space_info(info, cache->flags, found_key.offset,
														
 
															+					btrfs_block_group_used(&cache->item),
														
 
															+					&space_info);
														
 
															+		BUG_ON(ret);
														
 
															+		cache->space_info = space_info;
														
 
															+		spin_lock(&space_info->lock);
														
 
															+		list_add(&cache->list, &space_info->block_groups);
														
 
															+		spin_unlock(&space_info->lock);
														
 
															+
														
 
															+		ret = btrfs_add_block_group_cache(root->fs_info, cache);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		set_avail_alloc_bits(root->fs_info, cache->flags);
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	btrfs_free_path(path);
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root, u64 bytes_used,
														
 
															+			   u64 type, u64 chunk_objectid, u64 chunk_offset,
														
 
															+			   u64 size)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_root *extent_root;
														
 
															+	struct btrfs_block_group_cache *cache;
														
 
															+
														
 
															+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	extent_root = root->fs_info->extent_root;
														
 
															+
														
 
															+	root->fs_info->last_trans_new_blockgroup = trans->transid;
														
 
															+
														
 
															+	cache = kzalloc(sizeof(*cache), GFP_NOFS);
														
 
															+	if (!cache)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	cache->key.objectid = chunk_offset;
														
 
															+	cache->key.offset = size;
														
 
															+	spin_lock_init(&cache->lock);
														
 
															+	INIT_LIST_HEAD(&cache->list);
														
 
															+	btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
														
 
															+
														
 
															+	btrfs_set_block_group_used(&cache->item, bytes_used);
														
 
															+	btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
														
 
															+	cache->flags = type;
														
 
															+	btrfs_set_block_group_flags(&cache->item, type);
														
 
															+
														
 
															+	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
														
 
															+				&cache->space_info);
														
 
															+	BUG_ON(ret);
														
 
															+	spin_lock(&cache->space_info->lock);
														
 
															+	list_add(&cache->list, &cache->space_info->block_groups);
														
 
															+	spin_unlock(&cache->space_info->lock);
														
 
															+
														
 
															+	ret = btrfs_add_block_group_cache(root->fs_info, cache);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
														
 
															+				sizeof(cache->item));
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	finish_current_insert(trans, extent_root);
														
 
															+	ret = del_pending_extents(trans, extent_root);
														
 
															+	BUG_ON(ret);
														
 
															+	set_avail_alloc_bits(extent_root->fs_info, type);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root, u64 group_start)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+	struct btrfs_key key;
														
 
															+	int ret;
														
 
															+
														
 
															+	BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
														
 
															+	root = root->fs_info->extent_root;
														
 
															+
														
 
															+	block_group = btrfs_lookup_block_group(root->fs_info, group_start);
														
 
															+	BUG_ON(!block_group);
														
 
															+
														
 
															+	memcpy(&key, &block_group->key, sizeof(key));
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	btrfs_remove_free_space_cache(block_group);
														
 
															+	rb_erase(&block_group->cache_node,
														
 
															+		 &root->fs_info->block_group_cache_tree);
														
 
															+	spin_lock(&block_group->space_info->lock);
														
 
															+	list_del(&block_group->list);
														
 
															+	spin_unlock(&block_group->space_info->lock);
														
 
															+
														
 
															+	/*
														
 
															+	memset(shrink_block_group, 0, sizeof(*shrink_block_group));
														
 
															+	kfree(shrink_block_group);
														
 
															+	*/
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	if (ret > 0)
														
 
															+		ret = -EIO;
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	ret = btrfs_del_item(trans, root, path);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -0,0 +1,3416 @@
 
															+#include <linux/bitops.h>
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/bio.h>
														
 
															+#include <linux/mm.h>
														
 
															+#include <linux/gfp.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/page-flags.h>
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/blkdev.h>
														
 
															+#include <linux/swap.h>
														
 
															+#include <linux/version.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/pagevec.h>
														
 
															+#include "extent_io.h"
														
 
															+#include "extent_map.h"
														
 
															+#include "compat.h"
														
 
															+#include "ctree.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+
														
 
															+/* temporary define until extent_map moves out of btrfs */
														
 
															+struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
														
 
															+				       unsigned long extra_flags,
														
 
															+				       void (*ctor)(void *, struct kmem_cache *,
														
 
															+						    unsigned long));
														
 
															+
														
 
															+static struct kmem_cache *extent_state_cache;
														
 
															+static struct kmem_cache *extent_buffer_cache;
														
 
															+
														
 
															+static LIST_HEAD(buffers);
														
 
															+static LIST_HEAD(states);
														
 
															+
														
 
															+#ifdef LEAK_DEBUG
														
 
															+static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
														
 
															+#endif
														
 
															+
														
 
															+#define BUFFER_LRU_MAX 64
														
 
															+
														
 
															+struct tree_entry {
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	struct rb_node rb_node;
														
 
															+};
														
 
															+
														
 
															+struct extent_page_data {
														
 
															+	struct bio *bio;
														
 
															+	struct extent_io_tree *tree;
														
 
															+	get_extent_t *get_extent;
														
 
															+};
														
 
															+
														
 
															+int __init extent_io_init(void)
														
 
															+{
														
 
															+	extent_state_cache = btrfs_cache_create("extent_state",
														
 
															+					    sizeof(struct extent_state), 0,
														
 
															+					    NULL);
														
 
															+	if (!extent_state_cache)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	extent_buffer_cache = btrfs_cache_create("extent_buffers",
														
 
															+					    sizeof(struct extent_buffer), 0,
														
 
															+					    NULL);
														
 
															+	if (!extent_buffer_cache)
														
 
															+		goto free_state_cache;
														
 
															+	return 0;
														
 
															+
														
 
															+free_state_cache:
														
 
															+	kmem_cache_destroy(extent_state_cache);
														
 
															+	return -ENOMEM;
														
 
															+}
														
 
															+
														
 
															+void extent_io_exit(void)
														
 
															+{
														
 
															+	struct extent_state *state;
														
 
															+	struct extent_buffer *eb;
														
 
															+
														
 
															+	while (!list_empty(&states)) {
														
 
															+		state = list_entry(states.next, struct extent_state, leak_list);
														
 
															+		printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
														
 
															+		list_del(&state->leak_list);
														
 
															+		kmem_cache_free(extent_state_cache, state);
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	while (!list_empty(&buffers)) {
														
 
															+		eb = list_entry(buffers.next, struct extent_buffer, leak_list);
														
 
															+		printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs));
														
 
															+		list_del(&eb->leak_list);
														
 
															+		kmem_cache_free(extent_buffer_cache, eb);
														
 
															+	}
														
 
															+	if (extent_state_cache)
														
 
															+		kmem_cache_destroy(extent_state_cache);
														
 
															+	if (extent_buffer_cache)
														
 
															+		kmem_cache_destroy(extent_buffer_cache);
														
 
															+}
														
 
															+
														
 
															+void extent_io_tree_init(struct extent_io_tree *tree,
														
 
															+			  struct address_space *mapping, gfp_t mask)
														
 
															+{
														
 
															+	tree->state.rb_node = NULL;
														
 
															+	tree->buffer.rb_node = NULL;
														
 
															+	tree->ops = NULL;
														
 
															+	tree->dirty_bytes = 0;
														
 
															+	spin_lock_init(&tree->lock);
														
 
															+	spin_lock_init(&tree->buffer_lock);
														
 
															+	tree->mapping = mapping;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_io_tree_init);
														
 
															+
														
 
															+struct extent_state *alloc_extent_state(gfp_t mask)
														
 
															+{
														
 
															+	struct extent_state *state;
														
 
															+#ifdef LEAK_DEBUG
														
 
															+	unsigned long flags;
														
 
															+#endif
														
 
															+
														
 
															+	state = kmem_cache_alloc(extent_state_cache, mask);
														
 
															+	if (!state)
														
 
															+		return state;
														
 
															+	state->state = 0;
														
 
															+	state->private = 0;
														
 
															+	state->tree = NULL;
														
 
															+#ifdef LEAK_DEBUG
														
 
															+	spin_lock_irqsave(&leak_lock, flags);
														
 
															+	list_add(&state->leak_list, &states);
														
 
															+	spin_unlock_irqrestore(&leak_lock, flags);
														
 
															+#endif
														
 
															+	atomic_set(&state->refs, 1);
														
 
															+	init_waitqueue_head(&state->wq);
														
 
															+	return state;
														
 
															+}
														
 
															+EXPORT_SYMBOL(alloc_extent_state);
														
 
															+
														
 
															+void free_extent_state(struct extent_state *state)
														
 
															+{
														
 
															+	if (!state)
														
 
															+		return;
														
 
															+	if (atomic_dec_and_test(&state->refs)) {
														
 
															+#ifdef LEAK_DEBUG
														
 
															+		unsigned long flags;
														
 
															+#endif
														
 
															+		WARN_ON(state->tree);
														
 
															+#ifdef LEAK_DEBUG
														
 
															+		spin_lock_irqsave(&leak_lock, flags);
														
 
															+		list_del(&state->leak_list);
														
 
															+		spin_unlock_irqrestore(&leak_lock, flags);
														
 
															+#endif
														
 
															+		kmem_cache_free(extent_state_cache, state);
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(free_extent_state);
														
 
															+
														
 
															+static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
														
 
															+				   struct rb_node *node)
														
 
															+{
														
 
															+	struct rb_node ** p = &root->rb_node;
														
 
															+	struct rb_node * parent = NULL;
														
 
															+	struct tree_entry *entry;
														
 
															+
														
 
															+	while(*p) {
														
 
															+		parent = *p;
														
 
															+		entry = rb_entry(parent, struct tree_entry, rb_node);
														
 
															+
														
 
															+		if (offset < entry->start)
														
 
															+			p = &(*p)->rb_left;
														
 
															+		else if (offset > entry->end)
														
 
															+			p = &(*p)->rb_right;
														
 
															+		else
														
 
															+			return parent;
														
 
															+	}
														
 
															+
														
 
															+	entry = rb_entry(node, struct tree_entry, rb_node);
														
 
															+	rb_link_node(node, parent, p);
														
 
															+	rb_insert_color(node, root);
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
														
 
															+				     struct rb_node **prev_ret,
														
 
															+				     struct rb_node **next_ret)
														
 
															+{
														
 
															+	struct rb_root *root = &tree->state;
														
 
															+	struct rb_node * n = root->rb_node;
														
 
															+	struct rb_node *prev = NULL;
														
 
															+	struct rb_node *orig_prev = NULL;
														
 
															+	struct tree_entry *entry;
														
 
															+	struct tree_entry *prev_entry = NULL;
														
 
															+
														
 
															+	while(n) {
														
 
															+		entry = rb_entry(n, struct tree_entry, rb_node);
														
 
															+		prev = n;
														
 
															+		prev_entry = entry;
														
 
															+
														
 
															+		if (offset < entry->start)
														
 
															+			n = n->rb_left;
														
 
															+		else if (offset > entry->end)
														
 
															+			n = n->rb_right;
														
 
															+		else {
														
 
															+			return n;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (prev_ret) {
														
 
															+		orig_prev = prev;
														
 
															+		while(prev && offset > prev_entry->end) {
														
 
															+			prev = rb_next(prev);
														
 
															+			prev_entry = rb_entry(prev, struct tree_entry, rb_node);
														
 
															+		}
														
 
															+		*prev_ret = prev;
														
 
															+		prev = orig_prev;
														
 
															+	}
														
 
															+
														
 
															+	if (next_ret) {
														
 
															+		prev_entry = rb_entry(prev, struct tree_entry, rb_node);
														
 
															+		while(prev && offset < prev_entry->start) {
														
 
															+			prev = rb_prev(prev);
														
 
															+			prev_entry = rb_entry(prev, struct tree_entry, rb_node);
														
 
															+		}
														
 
															+		*next_ret = prev;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static inline struct rb_node *tree_search(struct extent_io_tree *tree,
														
 
															+					  u64 offset)
														
 
															+{
														
 
															+	struct rb_node *prev = NULL;
														
 
															+	struct rb_node *ret;
														
 
															+
														
 
															+	ret = __etree_search(tree, offset, &prev, NULL);
														
 
															+	if (!ret) {
														
 
															+		return prev;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
														
 
															+					  u64 offset, struct rb_node *node)
														
 
															+{
														
 
															+	struct rb_root *root = &tree->buffer;
														
 
															+	struct rb_node ** p = &root->rb_node;
														
 
															+	struct rb_node * parent = NULL;
														
 
															+	struct extent_buffer *eb;
														
 
															+
														
 
															+	while(*p) {
														
 
															+		parent = *p;
														
 
															+		eb = rb_entry(parent, struct extent_buffer, rb_node);
														
 
															+
														
 
															+		if (offset < eb->start)
														
 
															+			p = &(*p)->rb_left;
														
 
															+		else if (offset > eb->start)
														
 
															+			p = &(*p)->rb_right;
														
 
															+		else
														
 
															+			return eb;
														
 
															+	}
														
 
															+
														
 
															+	rb_link_node(node, parent, p);
														
 
															+	rb_insert_color(node, root);
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
														
 
															+					   u64 offset)
														
 
															+{
														
 
															+	struct rb_root *root = &tree->buffer;
														
 
															+	struct rb_node * n = root->rb_node;
														
 
															+	struct extent_buffer *eb;
														
 
															+
														
 
															+	while(n) {
														
 
															+		eb = rb_entry(n, struct extent_buffer, rb_node);
														
 
															+		if (offset < eb->start)
														
 
															+			n = n->rb_left;
														
 
															+		else if (offset > eb->start)
														
 
															+			n = n->rb_right;
														
 
															+		else
														
 
															+			return eb;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * utility function to look for merge candidates inside a given range.
														
 
															+ * Any extents with matching state are merged together into a single
														
 
															+ * extent in the tree.  Extents with EXTENT_IO in their state field
														
 
															+ * are not merged because the end_io handlers need to be able to do
														
 
															+ * operations on them without sleeping (or doing allocations/splits).
														
 
															+ *
														
 
															+ * This should be called with the tree lock held.
														
 
															+ */
														
 
															+static int merge_state(struct extent_io_tree *tree,
														
 
															+		       struct extent_state *state)
														
 
															+{
														
 
															+	struct extent_state *other;
														
 
															+	struct rb_node *other_node;
														
 
															+
														
 
															+	if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
														
 
															+		return 0;
														
 
															+
														
 
															+	other_node = rb_prev(&state->rb_node);
														
 
															+	if (other_node) {
														
 
															+		other = rb_entry(other_node, struct extent_state, rb_node);
														
 
															+		if (other->end == state->start - 1 &&
														
 
															+		    other->state == state->state) {
														
 
															+			state->start = other->start;
														
 
															+			other->tree = NULL;
														
 
															+			rb_erase(&other->rb_node, &tree->state);
														
 
															+			free_extent_state(other);
														
 
															+		}
														
 
															+	}
														
 
															+	other_node = rb_next(&state->rb_node);
														
 
															+	if (other_node) {
														
 
															+		other = rb_entry(other_node, struct extent_state, rb_node);
														
 
															+		if (other->start == state->end + 1 &&
														
 
															+		    other->state == state->state) {
														
 
															+			other->start = state->start;
														
 
															+			state->tree = NULL;
														
 
															+			rb_erase(&state->rb_node, &tree->state);
														
 
															+			free_extent_state(state);
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void set_state_cb(struct extent_io_tree *tree,
														
 
															+			 struct extent_state *state,
														
 
															+			 unsigned long bits)
														
 
															+{
														
 
															+	if (tree->ops && tree->ops->set_bit_hook) {
														
 
															+		tree->ops->set_bit_hook(tree->mapping->host, state->start,
														
 
															+					state->end, state->state, bits);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void clear_state_cb(struct extent_io_tree *tree,
														
 
															+			   struct extent_state *state,
														
 
															+			   unsigned long bits)
														
 
															+{
														
 
															+	if (tree->ops && tree->ops->set_bit_hook) {
														
 
															+		tree->ops->clear_bit_hook(tree->mapping->host, state->start,
														
 
															+					  state->end, state->state, bits);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * insert an extent_state struct into the tree.  'bits' are set on the
														
 
															+ * struct before it is inserted.
														
 
															+ *
														
 
															+ * This may return -EEXIST if the extent is already there, in which case the
														
 
															+ * state struct is freed.
														
 
															+ *
														
 
															+ * The tree lock is not taken internally.  This is a utility function and
														
 
															+ * probably isn't what you want to call (see set/clear_extent_bit).
														
 
															+ */
														
 
															+static int insert_state(struct extent_io_tree *tree,
														
 
															+			struct extent_state *state, u64 start, u64 end,
														
 
															+			int bits)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+
														
 
															+	if (end < start) {
														
 
															+		printk("end < start %Lu %Lu\n", end, start);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+	if (bits & EXTENT_DIRTY)
														
 
															+		tree->dirty_bytes += end - start + 1;
														
 
															+	set_state_cb(tree, state, bits);
														
 
															+	state->state |= bits;
														
 
															+	state->start = start;
														
 
															+	state->end = end;
														
 
															+	node = tree_insert(&tree->state, end, &state->rb_node);
														
 
															+	if (node) {
														
 
															+		struct extent_state *found;
														
 
															+		found = rb_entry(node, struct extent_state, rb_node);
														
 
															+		printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
														
 
															+		free_extent_state(state);
														
 
															+		return -EEXIST;
														
 
															+	}
														
 
															+	state->tree = tree;
														
 
															+	merge_state(tree, state);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * split a given extent state struct in two, inserting the preallocated
														
 
															+ * struct 'prealloc' as the newly created second half.  'split' indicates an
														
 
															+ * offset inside 'orig' where it should be split.
														
 
															+ *
														
 
															+ * Before calling,
														
 
															+ * the tree has 'orig' at [orig->start, orig->end].  After calling, there
														
 
															+ * are two extent state structs in the tree:
														
 
															+ * prealloc: [orig->start, split - 1]
														
 
															+ * orig: [ split, orig->end ]
														
 
															+ *
														
 
															+ * The tree locks are not taken by this function. They need to be held
														
 
															+ * by the caller.
														
 
															+ */
														
 
															+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
														
 
															+		       struct extent_state *prealloc, u64 split)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	prealloc->start = orig->start;
														
 
															+	prealloc->end = split - 1;
														
 
															+	prealloc->state = orig->state;
														
 
															+	orig->start = split;
														
 
															+
														
 
															+	node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
														
 
															+	if (node) {
														
 
															+		struct extent_state *found;
														
 
															+		found = rb_entry(node, struct extent_state, rb_node);
														
 
															+		printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
														
 
															+		free_extent_state(prealloc);
														
 
															+		return -EEXIST;
														
 
															+	}
														
 
															+	prealloc->tree = tree;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * utility function to clear some bits in an extent state struct.
														
 
															+ * it will optionally wake up any one waiting on this state (wake == 1), or
														
 
															+ * forcibly remove the state from the tree (delete == 1).
														
 
															+ *
														
 
															+ * If no bits are set on the state struct after clearing things, the
														
 
															+ * struct is freed and removed from the tree
														
 
															+ */
														
 
															+static int clear_state_bit(struct extent_io_tree *tree,
														
 
															+			    struct extent_state *state, int bits, int wake,
														
 
															+			    int delete)
														
 
															+{
														
 
															+	int ret = state->state & bits;
														
 
															+
														
 
															+	if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
														
 
															+		u64 range = state->end - state->start + 1;
														
 
															+		WARN_ON(range > tree->dirty_bytes);
														
 
															+		tree->dirty_bytes -= range;
														
 
															+	}
														
 
															+	clear_state_cb(tree, state, bits);
														
 
															+	state->state &= ~bits;
														
 
															+	if (wake)
														
 
															+		wake_up(&state->wq);
														
 
															+	if (delete || state->state == 0) {
														
 
															+		if (state->tree) {
														
 
															+			clear_state_cb(tree, state, state->state);
														
 
															+			rb_erase(&state->rb_node, &tree->state);
														
 
															+			state->tree = NULL;
														
 
															+			free_extent_state(state);
														
 
															+		} else {
														
 
															+			WARN_ON(1);
														
 
															+		}
														
 
															+	} else {
														
 
															+		merge_state(tree, state);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * clear some bits on a range in the tree.  This may require splitting
														
 
															+ * or inserting elements in the tree, so the gfp mask is used to
														
 
															+ * indicate which allocations or sleeping are allowed.
														
 
															+ *
														
 
															+ * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
														
 
															+ * the given range from the tree regardless of state (ie for truncate).
														
 
															+ *
														
 
															+ * the range [start, end] is inclusive.
														
 
															+ *
														
 
															+ * This takes the tree lock, and returns < 0 on error, > 0 if any of the
														
 
															+ * bits were already set, or zero if none of the bits were already set.
														
 
															+ */
														
 
															+int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     int bits, int wake, int delete, gfp_t mask)
														
 
															+{
														
 
															+	struct extent_state *state;
														
 
															+	struct extent_state *prealloc = NULL;
														
 
															+	struct rb_node *node;
														
 
															+	unsigned long flags;
														
 
															+	int err;
														
 
															+	int set = 0;
														
 
															+
														
 
															+again:
														
 
															+	if (!prealloc && (mask & __GFP_WAIT)) {
														
 
															+		prealloc = alloc_extent_state(mask);
														
 
															+		if (!prealloc)
														
 
															+			return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock_irqsave(&tree->lock, flags);
														
 
															+	/*
														
 
															+	 * this search will find the extents that end after
														
 
															+	 * our range starts
														
 
															+	 */
														
 
															+	node = tree_search(tree, start);
														
 
															+	if (!node)
														
 
															+		goto out;
														
 
															+	state = rb_entry(node, struct extent_state, rb_node);
														
 
															+	if (state->start > end)
														
 
															+		goto out;
														
 
															+	WARN_ON(state->end < start);
														
 
															+
														
 
															+	/*
														
 
															+	 *     | ---- desired range ---- |
														
 
															+	 *  | state | or
														
 
															+	 *  | ------------- state -------------- |
														
 
															+	 *
														
 
															+	 * We need to split the extent we found, and may flip
														
 
															+	 * bits on second half.
														
 
															+	 *
														
 
															+	 * If the extent we found extends past our range, we
														
 
															+	 * just split and search again.  It'll get split again
														
 
															+	 * the next time though.
														
 
															+	 *
														
 
															+	 * If the extent we found is inside our range, we clear
														
 
															+	 * the desired bit on it.
														
 
															+	 */
														
 
															+
														
 
															+	if (state->start < start) {
														
 
															+		if (!prealloc)
														
 
															+			prealloc = alloc_extent_state(GFP_ATOMIC);
														
 
															+		err = split_state(tree, state, prealloc, start);
														
 
															+		BUG_ON(err == -EEXIST);
														
 
															+		prealloc = NULL;
														
 
															+		if (err)
														
 
															+			goto out;
														
 
															+		if (state->end <= end) {
														
 
															+			start = state->end + 1;
														
 
															+			set |= clear_state_bit(tree, state, bits,
														
 
															+					wake, delete);
														
 
															+		} else {
														
 
															+			start = state->start;
														
 
															+		}
														
 
															+		goto search_again;
														
 
															+	}
														
 
															+	/*
														
 
															+	 * | ---- desired range ---- |
														
 
															+	 *                        | state |
														
 
															+	 * We need to split the extent, and clear the bit
														
 
															+	 * on the first half
														
 
															+	 */
														
 
															+	if (state->start <= end && state->end > end) {
														
 
															+		if (!prealloc)
														
 
															+			prealloc = alloc_extent_state(GFP_ATOMIC);
														
 
															+		err = split_state(tree, state, prealloc, end + 1);
														
 
															+		BUG_ON(err == -EEXIST);
														
 
															+
														
 
															+		if (wake)
														
 
															+			wake_up(&state->wq);
														
 
															+		set |= clear_state_bit(tree, prealloc, bits,
														
 
															+				       wake, delete);
														
 
															+		prealloc = NULL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	start = state->end + 1;
														
 
															+	set |= clear_state_bit(tree, state, bits, wake, delete);
														
 
															+	goto search_again;
														
 
															+
														
 
															+out:
														
 
															+	spin_unlock_irqrestore(&tree->lock, flags);
														
 
															+	if (prealloc)
														
 
															+		free_extent_state(prealloc);
														
 
															+
														
 
															+	return set;
														
 
															+
														
 
															+search_again:
														
 
															+	if (start > end)
														
 
															+		goto out;
														
 
															+	spin_unlock_irqrestore(&tree->lock, flags);
														
 
															+	if (mask & __GFP_WAIT)
														
 
															+		cond_resched();
														
 
															+	goto again;
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_bit);
														
 
															+
														
 
															+static int wait_on_state(struct extent_io_tree *tree,
														
 
															+			 struct extent_state *state)
														
 
															+{
														
 
															+	DEFINE_WAIT(wait);
														
 
															+	prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+	schedule();
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+	finish_wait(&state->wq, &wait);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * waits for one or more bits to clear on a range in the state tree.
														
 
															+ * The range [start, end] is inclusive.
														
 
															+ * The tree lock is taken by this function
														
 
															+ */
														
 
															+int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
														
 
															+{
														
 
															+	struct extent_state *state;
														
 
															+	struct rb_node *node;
														
 
															+
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+again:
														
 
															+	while (1) {
														
 
															+		/*
														
 
															+		 * this search will find all the extents that end after
														
 
															+		 * our range starts
														
 
															+		 */
														
 
															+		node = tree_search(tree, start);
														
 
															+		if (!node)
														
 
															+			break;
														
 
															+
														
 
															+		state = rb_entry(node, struct extent_state, rb_node);
														
 
															+
														
 
															+		if (state->start > end)
														
 
															+			goto out;
														
 
															+
														
 
															+		if (state->state & bits) {
														
 
															+			start = state->start;
														
 
															+			atomic_inc(&state->refs);
														
 
															+			wait_on_state(tree, state);
														
 
															+			free_extent_state(state);
														
 
															+			goto again;
														
 
															+		}
														
 
															+		start = state->end + 1;
														
 
															+
														
 
															+		if (start > end)
														
 
															+			break;
														
 
															+
														
 
															+		if (need_resched()) {
														
 
															+			spin_unlock_irq(&tree->lock);
														
 
															+			cond_resched();
														
 
															+			spin_lock_irq(&tree->lock);
														
 
															+		}
														
 
															+	}
														
 
															+out:
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(wait_extent_bit);
														
 
															+
														
 
															+static void set_state_bits(struct extent_io_tree *tree,
														
 
															+			   struct extent_state *state,
														
 
															+			   int bits)
														
 
															+{
														
 
															+	if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
														
 
															+		u64 range = state->end - state->start + 1;
														
 
															+		tree->dirty_bytes += range;
														
 
															+	}
														
 
															+	set_state_cb(tree, state, bits);
														
 
															+	state->state |= bits;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * set some bits on a range in the tree.  This may require allocations
														
 
															+ * or sleeping, so the gfp mask is used to indicate what is allowed.
														
 
															+ *
														
 
															+ * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
														
 
															+ * range already has the desired bits set.  The start of the existing
														
 
															+ * range is returned in failed_start in this case.
														
 
															+ *
														
 
															+ * [start, end] is inclusive
														
 
															+ * This takes the tree lock.
														
 
															+ */
														
 
															+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
														
 
															+		   int exclusive, u64 *failed_start, gfp_t mask)
														
 
															+{
														
 
															+	struct extent_state *state;
														
 
															+	struct extent_state *prealloc = NULL;
														
 
															+	struct rb_node *node;
														
 
															+	unsigned long flags;
														
 
															+	int err = 0;
														
 
															+	int set;
														
 
															+	u64 last_start;
														
 
															+	u64 last_end;
														
 
															+again:
														
 
															+	if (!prealloc && (mask & __GFP_WAIT)) {
														
 
															+		prealloc = alloc_extent_state(mask);
														
 
															+		if (!prealloc)
														
 
															+			return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock_irqsave(&tree->lock, flags);
														
 
															+	/*
														
 
															+	 * this search will find all the extents that end after
														
 
															+	 * our range starts.
														
 
															+	 */
														
 
															+	node = tree_search(tree, start);
														
 
															+	if (!node) {
														
 
															+		err = insert_state(tree, prealloc, start, end, bits);
														
 
															+		prealloc = NULL;
														
 
															+		BUG_ON(err == -EEXIST);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	state = rb_entry(node, struct extent_state, rb_node);
														
 
															+	last_start = state->start;
														
 
															+	last_end = state->end;
														
 
															+
														
 
															+	/*
														
 
															+	 * | ---- desired range ---- |
														
 
															+	 * | state |
														
 
															+	 *
														
 
															+	 * Just lock what we found and keep going
														
 
															+	 */
														
 
															+	if (state->start == start && state->end <= end) {
														
 
															+		set = state->state & bits;
														
 
															+		if (set && exclusive) {
														
 
															+			*failed_start = state->start;
														
 
															+			err = -EEXIST;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		set_state_bits(tree, state, bits);
														
 
															+		start = state->end + 1;
														
 
															+		merge_state(tree, state);
														
 
															+		goto search_again;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 *     | ---- desired range ---- |
														
 
															+	 * | state |
														
 
															+	 *   or
														
 
															+	 * | ------------- state -------------- |
														
 
															+	 *
														
 
															+	 * We need to split the extent we found, and may flip bits on
														
 
															+	 * second half.
														
 
															+	 *
														
 
															+	 * If the extent we found extends past our
														
 
															+	 * range, we just split and search again.  It'll get split
														
 
															+	 * again the next time though.
														
 
															+	 *
														
 
															+	 * If the extent we found is inside our range, we set the
														
 
															+	 * desired bit on it.
														
 
															+	 */
														
 
															+	if (state->start < start) {
														
 
															+		set = state->state & bits;
														
 
															+		if (exclusive && set) {
														
 
															+			*failed_start = start;
														
 
															+			err = -EEXIST;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		err = split_state(tree, state, prealloc, start);
														
 
															+		BUG_ON(err == -EEXIST);
														
 
															+		prealloc = NULL;
														
 
															+		if (err)
														
 
															+			goto out;
														
 
															+		if (state->end <= end) {
														
 
															+			set_state_bits(tree, state, bits);
														
 
															+			start = state->end + 1;
														
 
															+			merge_state(tree, state);
														
 
															+		} else {
														
 
															+			start = state->start;
														
 
															+		}
														
 
															+		goto search_again;
														
 
															+	}
														
 
															+	/*
														
 
															+	 * | ---- desired range ---- |
														
 
															+	 *     | state | or               | state |
														
 
															+	 *
														
 
															+	 * There's a hole, we need to insert something in it and
														
 
															+	 * ignore the extent we found.
														
 
															+	 */
														
 
															+	if (state->start > start) {
														
 
															+		u64 this_end;
														
 
															+		if (end < last_start)
														
 
															+			this_end = end;
														
 
															+		else
														
 
															+			this_end = last_start -1;
														
 
															+		err = insert_state(tree, prealloc, start, this_end,
														
 
															+				   bits);
														
 
															+		prealloc = NULL;
														
 
															+		BUG_ON(err == -EEXIST);
														
 
															+		if (err)
														
 
															+			goto out;
														
 
															+		start = this_end + 1;
														
 
															+		goto search_again;
														
 
															+	}
														
 
															+	/*
														
 
															+	 * | ---- desired range ---- |
														
 
															+	 *                        | state |
														
 
															+	 * We need to split the extent, and set the bit
														
 
															+	 * on the first half
														
 
															+	 */
														
 
															+	if (state->start <= end && state->end > end) {
														
 
															+		set = state->state & bits;
														
 
															+		if (exclusive && set) {
														
 
															+			*failed_start = start;
														
 
															+			err = -EEXIST;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		err = split_state(tree, state, prealloc, end + 1);
														
 
															+		BUG_ON(err == -EEXIST);
														
 
															+
														
 
															+		set_state_bits(tree, prealloc, bits);
														
 
															+		merge_state(tree, prealloc);
														
 
															+		prealloc = NULL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	goto search_again;
														
 
															+
														
 
															+out:
														
 
															+	spin_unlock_irqrestore(&tree->lock, flags);
														
 
															+	if (prealloc)
														
 
															+		free_extent_state(prealloc);
														
 
															+
														
 
															+	return err;
														
 
															+
														
 
															+search_again:
														
 
															+	if (start > end)
														
 
															+		goto out;
														
 
															+	spin_unlock_irqrestore(&tree->lock, flags);
														
 
															+	if (mask & __GFP_WAIT)
														
 
															+		cond_resched();
														
 
															+	goto again;
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_bit);
														
 
															+
														
 
															+/* wrappers around set/clear extent bit */
														
 
															+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     gfp_t mask)
														
 
															+{
														
 
															+	return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
														
 
															+			      mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_dirty);
														
 
															+
														
 
															+int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		       gfp_t mask)
														
 
															+{
														
 
															+	return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_ordered);
														
 
															+
														
 
															+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		    int bits, gfp_t mask)
														
 
															+{
														
 
															+	return set_extent_bit(tree, start, end, bits, 0, NULL,
														
 
															+			      mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_bits);
														
 
															+
														
 
															+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		      int bits, gfp_t mask)
														
 
															+{
														
 
															+	return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_bits);
														
 
															+
														
 
															+int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     gfp_t mask)
														
 
															+{
														
 
															+	return set_extent_bit(tree, start, end,
														
 
															+			      EXTENT_DELALLOC | EXTENT_DIRTY,
														
 
															+			      0, NULL, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_delalloc);
														
 
															+
														
 
															+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		       gfp_t mask)
														
 
															+{
														
 
															+	return clear_extent_bit(tree, start, end,
														
 
															+				EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_dirty);
														
 
															+
														
 
															+int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+			 gfp_t mask)
														
 
															+{
														
 
															+	return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_ordered);
														
 
															+
														
 
															+int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     gfp_t mask)
														
 
															+{
														
 
															+	return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
														
 
															+			      mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_new);
														
 
															+
														
 
															+int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		       gfp_t mask)
														
 
															+{
														
 
															+	return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_new);
														
 
															+
														
 
															+int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+			gfp_t mask)
														
 
															+{
														
 
															+	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
														
 
															+			      mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_uptodate);
														
 
															+
														
 
															+int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+			  gfp_t mask)
														
 
															+{
														
 
															+	return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_uptodate);
														
 
															+
														
 
															+int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+			 gfp_t mask)
														
 
															+{
														
 
															+	return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
														
 
															+			      0, NULL, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_writeback);
														
 
															+
														
 
															+int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+			   gfp_t mask)
														
 
															+{
														
 
															+	return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_writeback);
														
 
															+
														
 
															+int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
														
 
															+{
														
 
															+	return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
														
 
															+}
														
 
															+EXPORT_SYMBOL(wait_on_extent_writeback);
														
 
															+
														
 
															+/*
														
 
															+ * either insert or lock state struct between start and end use mask to tell
														
 
															+ * us if waiting is desired.
														
 
															+ */
														
 
															+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
														
 
															+{
														
 
															+	int err;
														
 
															+	u64 failed_start;
														
 
															+	while (1) {
														
 
															+		err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
														
 
															+				     &failed_start, mask);
														
 
															+		if (err == -EEXIST && (mask & __GFP_WAIT)) {
														
 
															+			wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
														
 
															+			start = failed_start;
														
 
															+		} else {
														
 
															+			break;
														
 
															+		}
														
 
															+		WARN_ON(start > end);
														
 
															+	}
														
 
															+	return err;
														
 
															+}
														
 
															+EXPORT_SYMBOL(lock_extent);
														
 
															+
														
 
															+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		  gfp_t mask)
														
 
															+{
														
 
															+	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(unlock_extent);
														
 
															+
														
 
															+/*
														
 
															+ * helper function to set pages and extents in the tree dirty
														
 
															+ */
														
 
															+int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
														
 
															+{
														
 
															+	unsigned long index = start >> PAGE_CACHE_SHIFT;
														
 
															+	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
														
 
															+	struct page *page;
														
 
															+
														
 
															+	while (index <= end_index) {
														
 
															+		page = find_get_page(tree->mapping, index);
														
 
															+		BUG_ON(!page);
														
 
															+		__set_page_dirty_nobuffers(page);
														
 
															+		page_cache_release(page);
														
 
															+		index++;
														
 
															+	}
														
 
															+	set_extent_dirty(tree, start, end, GFP_NOFS);
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_range_dirty);
														
 
															+
														
 
															+/*
														
 
															+ * helper function to set both pages and extents in the tree writeback
														
 
															+ */
														
 
															+int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
														
 
															+{
														
 
															+	unsigned long index = start >> PAGE_CACHE_SHIFT;
														
 
															+	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
														
 
															+	struct page *page;
														
 
															+
														
 
															+	while (index <= end_index) {
														
 
															+		page = find_get_page(tree->mapping, index);
														
 
															+		BUG_ON(!page);
														
 
															+		set_page_writeback(page);
														
 
															+		page_cache_release(page);
														
 
															+		index++;
														
 
															+	}
														
 
															+	set_extent_writeback(tree, start, end, GFP_NOFS);
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_range_writeback);
														
 
															+
														
 
															+/*
														
 
															+ * find the first offset in the io tree with 'bits' set. zero is
														
 
															+ * returned if we find something, and *start_ret and *end_ret are
														
 
															+ * set to reflect the state struct that was found.
														
 
															+ *
														
 
															+ * If nothing was found, 1 is returned, < 0 on error
														
 
															+ */
														
 
															+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
														
 
															+			  u64 *start_ret, u64 *end_ret, int bits)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	struct extent_state *state;
														
 
															+	int ret = 1;
														
 
															+
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+	/*
														
 
															+	 * this search will find all the extents that end after
														
 
															+	 * our range starts.
														
 
															+	 */
														
 
															+	node = tree_search(tree, start);
														
 
															+	if (!node) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	while(1) {
														
 
															+		state = rb_entry(node, struct extent_state, rb_node);
														
 
															+		if (state->end >= start && (state->state & bits)) {
														
 
															+			*start_ret = state->start;
														
 
															+			*end_ret = state->end;
														
 
															+			ret = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+		node = rb_next(node);
														
 
															+		if (!node)
														
 
															+			break;
														
 
															+	}
														
 
															+out:
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(find_first_extent_bit);
														
 
															+
														
 
															+/* find the first state struct with 'bits' set after 'start', and
														
 
															+ * return it.  tree->lock must be held.  NULL will returned if
														
 
															+ * nothing was found after 'start'
														
 
															+ */
														
 
															+struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
														
 
															+						 u64 start, int bits)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	struct extent_state *state;
														
 
															+
														
 
															+	/*
														
 
															+	 * this search will find all the extents that end after
														
 
															+	 * our range starts.
														
 
															+	 */
														
 
															+	node = tree_search(tree, start);
														
 
															+	if (!node) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	while(1) {
														
 
															+		state = rb_entry(node, struct extent_state, rb_node);
														
 
															+		if (state->end >= start && (state->state & bits)) {
														
 
															+			return state;
														
 
															+		}
														
 
															+		node = rb_next(node);
														
 
															+		if (!node)
														
 
															+			break;
														
 
															+	}
														
 
															+out:
														
 
															+	return NULL;
														
 
															+}
														
 
															+EXPORT_SYMBOL(find_first_extent_bit_state);
														
 
															+
														
 
															+/*
														
 
															+ * find a contiguous range of bytes in the file marked as delalloc, not
														
 
															+ * more than 'max_bytes'.  start and end are used to return the range,
														
 
															+ *
														
 
															+ * 1 is returned if we find something, 0 if nothing was in the tree
														
 
															+ */
														
 
															+static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree,
														
 
															+					     u64 *start, u64 *end, u64 max_bytes)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	struct extent_state *state;
														
 
															+	u64 cur_start = *start;
														
 
															+	u64 found = 0;
														
 
															+	u64 total_bytes = 0;
														
 
															+
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+	/*
														
 
															+	 * this search will find all the extents that end after
														
 
															+	 * our range starts.
														
 
															+	 */
														
 
															+search_again:
														
 
															+	node = tree_search(tree, cur_start);
														
 
															+	if (!node) {
														
 
															+		if (!found)
														
 
															+			*end = (u64)-1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	while(1) {
														
 
															+		state = rb_entry(node, struct extent_state, rb_node);
														
 
															+		if (found && (state->start != cur_start ||
														
 
															+			      (state->state & EXTENT_BOUNDARY))) {
														
 
															+			goto out;
														
 
															+		}
														
 
															+		if (!(state->state & EXTENT_DELALLOC)) {
														
 
															+			if (!found)
														
 
															+				*end = state->end;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		if (!found && !(state->state & EXTENT_BOUNDARY)) {
														
 
															+			struct extent_state *prev_state;
														
 
															+			struct rb_node *prev_node = node;
														
 
															+			while(1) {
														
 
															+				prev_node = rb_prev(prev_node);
														
 
															+				if (!prev_node)
														
 
															+					break;
														
 
															+				prev_state = rb_entry(prev_node,
														
 
															+						      struct extent_state,
														
 
															+						      rb_node);
														
 
															+				if ((prev_state->end + 1 != state->start) ||
														
 
															+				    !(prev_state->state & EXTENT_DELALLOC))
														
 
															+					break;
														
 
															+				if ((cur_start - prev_state->start) * 2 >
														
 
															+				     max_bytes)
														
 
															+					break;
														
 
															+				state = prev_state;
														
 
															+				node = prev_node;
														
 
															+			}
														
 
															+		}
														
 
															+		if (state->state & EXTENT_LOCKED) {
														
 
															+			DEFINE_WAIT(wait);
														
 
															+			atomic_inc(&state->refs);
														
 
															+			prepare_to_wait(&state->wq, &wait,
														
 
															+					TASK_UNINTERRUPTIBLE);
														
 
															+			spin_unlock_irq(&tree->lock);
														
 
															+			schedule();
														
 
															+			spin_lock_irq(&tree->lock);
														
 
															+			finish_wait(&state->wq, &wait);
														
 
															+			free_extent_state(state);
														
 
															+			goto search_again;
														
 
															+		}
														
 
															+		set_state_cb(tree, state, EXTENT_LOCKED);
														
 
															+		state->state |= EXTENT_LOCKED;
														
 
															+		if (!found)
														
 
															+			*start = state->start;
														
 
															+		found++;
														
 
															+		*end = state->end;
														
 
															+		cur_start = state->end + 1;
														
 
															+		node = rb_next(node);
														
 
															+		if (!node)
														
 
															+			break;
														
 
															+		total_bytes += state->end - state->start + 1;
														
 
															+		if (total_bytes >= max_bytes)
														
 
															+			break;
														
 
															+	}
														
 
															+out:
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+	return found;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * count the number of bytes in the tree that have a given bit(s)
														
 
															+ * set.  This can be fairly slow, except for EXTENT_DIRTY which is
														
 
															+ * cached.  The total number found is returned.
														
 
															+ */
														
 
															+u64 count_range_bits(struct extent_io_tree *tree,
														
 
															+		     u64 *start, u64 search_end, u64 max_bytes,
														
 
															+		     unsigned long bits)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	struct extent_state *state;
														
 
															+	u64 cur_start = *start;
														
 
															+	u64 total_bytes = 0;
														
 
															+	int found = 0;
														
 
															+
														
 
															+	if (search_end <= cur_start) {
														
 
															+		printk("search_end %Lu start %Lu\n", search_end, cur_start);
														
 
															+		WARN_ON(1);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+	if (cur_start == 0 && bits == EXTENT_DIRTY) {
														
 
															+		total_bytes = tree->dirty_bytes;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	/*
														
 
															+	 * this search will find all the extents that end after
														
 
															+	 * our range starts.
														
 
															+	 */
														
 
															+	node = tree_search(tree, cur_start);
														
 
															+	if (!node) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	while(1) {
														
 
															+		state = rb_entry(node, struct extent_state, rb_node);
														
 
															+		if (state->start > search_end)
														
 
															+			break;
														
 
															+		if (state->end >= cur_start && (state->state & bits)) {
														
 
															+			total_bytes += min(search_end, state->end) + 1 -
														
 
															+				       max(cur_start, state->start);
														
 
															+			if (total_bytes >= max_bytes)
														
 
															+				break;
														
 
															+			if (!found) {
														
 
															+				*start = state->start;
														
 
															+				found = 1;
														
 
															+			}
														
 
															+		}
														
 
															+		node = rb_next(node);
														
 
															+		if (!node)
														
 
															+			break;
														
 
															+	}
														
 
															+out:
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+	return total_bytes;
														
 
															+}
														
 
															+/*
														
 
															+ * helper function to lock both pages and extents in the tree.
														
 
															+ * pages must be locked first.
														
 
															+ */
														
 
															+int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
														
 
															+{
														
 
															+	unsigned long index = start >> PAGE_CACHE_SHIFT;
														
 
															+	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
														
 
															+	struct page *page;
														
 
															+	int err;
														
 
															+
														
 
															+	while (index <= end_index) {
														
 
															+		page = grab_cache_page(tree->mapping, index);
														
 
															+		if (!page) {
														
 
															+			err = -ENOMEM;
														
 
															+			goto failed;
														
 
															+		}
														
 
															+		if (IS_ERR(page)) {
														
 
															+			err = PTR_ERR(page);
														
 
															+			goto failed;
														
 
															+		}
														
 
															+		index++;
														
 
															+	}
														
 
															+	lock_extent(tree, start, end, GFP_NOFS);
														
 
															+	return 0;
														
 
															+
														
 
															+failed:
														
 
															+	/*
														
 
															+	 * we failed above in getting the page at 'index', so we undo here
														
 
															+	 * up to but not including the page at 'index'
														
 
															+	 */
														
 
															+	end_index = index;
														
 
															+	index = start >> PAGE_CACHE_SHIFT;
														
 
															+	while (index < end_index) {
														
 
															+		page = find_get_page(tree->mapping, index);
														
 
															+		unlock_page(page);
														
 
															+		page_cache_release(page);
														
 
															+		index++;
														
 
															+	}
														
 
															+	return err;
														
 
															+}
														
 
															+EXPORT_SYMBOL(lock_range);
														
 
															+
														
 
															+/*
														
 
															+ * helper function to unlock both pages and extents in the tree.
														
 
															+ */
														
 
															+int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
														
 
															+{
														
 
															+	unsigned long index = start >> PAGE_CACHE_SHIFT;
														
 
															+	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
														
 
															+	struct page *page;
														
 
															+
														
 
															+	while (index <= end_index) {
														
 
															+		page = find_get_page(tree->mapping, index);
														
 
															+		unlock_page(page);
														
 
															+		page_cache_release(page);
														
 
															+		index++;
														
 
															+	}
														
 
															+	unlock_extent(tree, start, end, GFP_NOFS);
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(unlock_range);
														
 
															+
														
 
															+/*
														
 
															+ * set the private field for a given byte offset in the tree.  If there isn't
														
 
															+ * an extent_state there already, this does nothing.
														
 
															+ */
														
 
															+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	struct extent_state *state;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+	/*
														
 
															+	 * this search will find all the extents that end after
														
 
															+	 * our range starts.
														
 
															+	 */
														
 
															+	node = tree_search(tree, start);
														
 
															+	if (!node) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	state = rb_entry(node, struct extent_state, rb_node);
														
 
															+	if (state->start != start) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	state->private = private;
														
 
															+out:
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	struct extent_state *state;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+	/*
														
 
															+	 * this search will find all the extents that end after
														
 
															+	 * our range starts.
														
 
															+	 */
														
 
															+	node = tree_search(tree, start);
														
 
															+	if (!node) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	state = rb_entry(node, struct extent_state, rb_node);
														
 
															+	if (state->start != start) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	*private = state->private;
														
 
															+out:
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * searches a range in the state tree for a given mask.
														
 
															+ * If 'filled' == 1, this returns 1 only if every extent in the tree
														
 
															+ * has the bits set.  Otherwise, 1 is returned if any bit in the
														
 
															+ * range is found set.
														
 
															+ */
														
 
															+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		   int bits, int filled)
														
 
															+{
														
 
															+	struct extent_state *state = NULL;
														
 
															+	struct rb_node *node;
														
 
															+	int bitset = 0;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&tree->lock, flags);
														
 
															+	node = tree_search(tree, start);
														
 
															+	while (node && start <= end) {
														
 
															+		state = rb_entry(node, struct extent_state, rb_node);
														
 
															+
														
 
															+		if (filled && state->start > start) {
														
 
															+			bitset = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		if (state->start > end)
														
 
															+			break;
														
 
															+
														
 
															+		if (state->state & bits) {
														
 
															+			bitset = 1;
														
 
															+			if (!filled)
														
 
															+				break;
														
 
															+		} else if (filled) {
														
 
															+			bitset = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+		start = state->end + 1;
														
 
															+		if (start > end)
														
 
															+			break;
														
 
															+		node = rb_next(node);
														
 
															+		if (!node) {
														
 
															+			if (filled)
														
 
															+				bitset = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+	spin_unlock_irqrestore(&tree->lock, flags);
														
 
															+	return bitset;
														
 
															+}
														
 
															+EXPORT_SYMBOL(test_range_bit);
														
 
															+
														
 
															+/*
														
 
															+ * helper function to set a given page up to date if all the
														
 
															+ * extents in the tree for that page are up to date
														
 
															+ */
														
 
															+static int check_page_uptodate(struct extent_io_tree *tree,
														
 
															+			       struct page *page)
														
 
															+{
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 end = start + PAGE_CACHE_SIZE - 1;
														
 
															+	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
														
 
															+		SetPageUptodate(page);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to unlock a page if all the extents in the tree
														
 
															+ * for that page are unlocked
														
 
															+ */
														
 
															+static int check_page_locked(struct extent_io_tree *tree,
														
 
															+			     struct page *page)
														
 
															+{
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 end = start + PAGE_CACHE_SIZE - 1;
														
 
															+	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
														
 
															+		unlock_page(page);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to end page writeback if all the extents
														
 
															+ * in the tree for that page are done with writeback
														
 
															+ */
														
 
															+static int check_page_writeback(struct extent_io_tree *tree,
														
 
															+			     struct page *page)
														
 
															+{
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 end = start + PAGE_CACHE_SIZE - 1;
														
 
															+	if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
														
 
															+		end_page_writeback(page);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* lots and lots of room for performance fixes in the end_bio funcs */
														
 
															+
														
 
															+/*
														
 
															+ * after a writepage IO is done, we need to:
														
 
															+ * clear the uptodate bits on error
														
 
															+ * clear the writeback bits in the extent tree for this IO
														
 
															+ * end_page_writeback if the page has no more pending IO
														
 
															+ *
														
 
															+ * Scheduling is not allowed, so the extent state tree is expected
														
 
															+ * to have one and only one object corresponding to this IO.
														
 
															+ */
														
 
															+static void end_bio_extent_writepage(struct bio *bio, int err)
														
 
															+{
														
 
															+	int uptodate = err == 0;
														
 
															+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
														
 
															+	struct extent_io_tree *tree;
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	int whole_page;
														
 
															+	int ret;
														
 
															+
														
 
															+	do {
														
 
															+		struct page *page = bvec->bv_page;
														
 
															+		tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+
														
 
															+		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
														
 
															+			 bvec->bv_offset;
														
 
															+		end = start + bvec->bv_len - 1;
														
 
															+
														
 
															+		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
														
 
															+			whole_page = 1;
														
 
															+		else
														
 
															+			whole_page = 0;
														
 
															+
														
 
															+		if (--bvec >= bio->bi_io_vec)
														
 
															+			prefetchw(&bvec->bv_page->flags);
														
 
															+		if (tree->ops && tree->ops->writepage_end_io_hook) {
														
 
															+			ret = tree->ops->writepage_end_io_hook(page, start,
														
 
															+						       end, NULL, uptodate);
														
 
															+			if (ret)
														
 
															+				uptodate = 0;
														
 
															+		}
														
 
															+
														
 
															+		if (!uptodate && tree->ops &&
														
 
															+		    tree->ops->writepage_io_failed_hook) {
														
 
															+			ret = tree->ops->writepage_io_failed_hook(bio, page,
														
 
															+							 start, end, NULL);
														
 
															+			if (ret == 0) {
														
 
															+				uptodate = (err == 0);
														
 
															+				continue;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if (!uptodate) {
														
 
															+			clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
														
 
															+			ClearPageUptodate(page);
														
 
															+			SetPageError(page);
														
 
															+		}
														
 
															+
														
 
															+		clear_extent_writeback(tree, start, end, GFP_ATOMIC);
														
 
															+
														
 
															+		if (whole_page)
														
 
															+			end_page_writeback(page);
														
 
															+		else
														
 
															+			check_page_writeback(tree, page);
														
 
															+	} while (bvec >= bio->bi_io_vec);
														
 
															+
														
 
															+	bio_put(bio);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * after a readpage IO is done, we need to:
														
 
															+ * clear the uptodate bits on error
														
 
															+ * set the uptodate bits if things worked
														
 
															+ * set the page up to date if all extents in the tree are uptodate
														
 
															+ * clear the lock bit in the extent tree
														
 
															+ * unlock the page if there are no other extents locked for it
														
 
															+ *
														
 
															+ * Scheduling is not allowed, so the extent state tree is expected
														
 
															+ * to have one and only one object corresponding to this IO.
														
 
															+ */
														
 
															+static void end_bio_extent_readpage(struct bio *bio, int err)
														
 
															+{
														
 
															+	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
														
 
															+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
														
 
															+	struct extent_io_tree *tree;
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	int whole_page;
														
 
															+	int ret;
														
 
															+
														
 
															+	do {
														
 
															+		struct page *page = bvec->bv_page;
														
 
															+		tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+
														
 
															+		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
														
 
															+			bvec->bv_offset;
														
 
															+		end = start + bvec->bv_len - 1;
														
 
															+
														
 
															+		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
														
 
															+			whole_page = 1;
														
 
															+		else
														
 
															+			whole_page = 0;
														
 
															+
														
 
															+		if (--bvec >= bio->bi_io_vec)
														
 
															+			prefetchw(&bvec->bv_page->flags);
														
 
															+
														
 
															+		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
														
 
															+			ret = tree->ops->readpage_end_io_hook(page, start, end,
														
 
															+							      NULL);
														
 
															+			if (ret)
														
 
															+				uptodate = 0;
														
 
															+		}
														
 
															+		if (!uptodate && tree->ops &&
														
 
															+		    tree->ops->readpage_io_failed_hook) {
														
 
															+			ret = tree->ops->readpage_io_failed_hook(bio, page,
														
 
															+							 start, end, NULL);
														
 
															+			if (ret == 0) {
														
 
															+				uptodate =
														
 
															+					test_bit(BIO_UPTODATE, &bio->bi_flags);
														
 
															+				continue;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if (uptodate)
														
 
															+			set_extent_uptodate(tree, start, end,
														
 
															+					    GFP_ATOMIC);
														
 
															+		unlock_extent(tree, start, end, GFP_ATOMIC);
														
 
															+
														
 
															+		if (whole_page) {
														
 
															+			if (uptodate) {
														
 
															+				SetPageUptodate(page);
														
 
															+			} else {
														
 
															+				ClearPageUptodate(page);
														
 
															+				SetPageError(page);
														
 
															+			}
														
 
															+			unlock_page(page);
														
 
															+		} else {
														
 
															+			if (uptodate) {
														
 
															+				check_page_uptodate(tree, page);
														
 
															+			} else {
														
 
															+				ClearPageUptodate(page);
														
 
															+				SetPageError(page);
														
 
															+			}
														
 
															+			check_page_locked(tree, page);
														
 
															+		}
														
 
															+	} while (bvec >= bio->bi_io_vec);
														
 
															+
														
 
															+	bio_put(bio);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * IO done from prepare_write is pretty simple, we just unlock
														
 
															+ * the structs in the extent tree when done, and set the uptodate bits
														
 
															+ * as appropriate.
														
 
															+ */
														
 
															+static void end_bio_extent_preparewrite(struct bio *bio, int err)
														
 
															+{
														
 
															+	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
														
 
															+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
														
 
															+	struct extent_io_tree *tree;
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+
														
 
															+	do {
														
 
															+		struct page *page = bvec->bv_page;
														
 
															+		tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+
														
 
															+		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
														
 
															+			bvec->bv_offset;
														
 
															+		end = start + bvec->bv_len - 1;
														
 
															+
														
 
															+		if (--bvec >= bio->bi_io_vec)
														
 
															+			prefetchw(&bvec->bv_page->flags);
														
 
															+
														
 
															+		if (uptodate) {
														
 
															+			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
														
 
															+		} else {
														
 
															+			ClearPageUptodate(page);
														
 
															+			SetPageError(page);
														
 
															+		}
														
 
															+
														
 
															+		unlock_extent(tree, start, end, GFP_ATOMIC);
														
 
															+
														
 
															+	} while (bvec >= bio->bi_io_vec);
														
 
															+
														
 
															+	bio_put(bio);
														
 
															+}
														
 
															+
														
 
															+static struct bio *
														
 
															+extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
														
 
															+		 gfp_t gfp_flags)
														
 
															+{
														
 
															+	struct bio *bio;
														
 
															+
														
 
															+	bio = bio_alloc(gfp_flags, nr_vecs);
														
 
															+
														
 
															+	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
														
 
															+		while (!bio && (nr_vecs /= 2))
														
 
															+			bio = bio_alloc(gfp_flags, nr_vecs);
														
 
															+	}
														
 
															+
														
 
															+	if (bio) {
														
 
															+		bio->bi_size = 0;
														
 
															+		bio->bi_bdev = bdev;
														
 
															+		bio->bi_sector = first_sector;
														
 
															+	}
														
 
															+	return bio;
														
 
															+}
														
 
															+
														
 
															+static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
														
 
															+	struct page *page = bvec->bv_page;
														
 
															+	struct extent_io_tree *tree = bio->bi_private;
														
 
															+	struct rb_node *node;
														
 
															+	struct extent_state *state;
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+
														
 
															+	start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
														
 
															+	end = start + bvec->bv_len - 1;
														
 
															+
														
 
															+	spin_lock_irq(&tree->lock);
														
 
															+	node = __etree_search(tree, start, NULL, NULL);
														
 
															+	BUG_ON(!node);
														
 
															+	state = rb_entry(node, struct extent_state, rb_node);
														
 
															+	while(state->end < end) {
														
 
															+		node = rb_next(node);
														
 
															+		state = rb_entry(node, struct extent_state, rb_node);
														
 
															+	}
														
 
															+	BUG_ON(state->end != end);
														
 
															+	spin_unlock_irq(&tree->lock);
														
 
															+
														
 
															+	bio->bi_private = NULL;
														
 
															+
														
 
															+	bio_get(bio);
														
 
															+
														
 
															+	if (tree->ops && tree->ops->submit_bio_hook)
														
 
															+		tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
														
 
															+					   mirror_num);
														
 
															+	else
														
 
															+		submit_bio(rw, bio);
														
 
															+	if (bio_flagged(bio, BIO_EOPNOTSUPP))
														
 
															+		ret = -EOPNOTSUPP;
														
 
															+	bio_put(bio);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int submit_extent_page(int rw, struct extent_io_tree *tree,
														
 
															+			      struct page *page, sector_t sector,
														
 
															+			      size_t size, unsigned long offset,
														
 
															+			      struct block_device *bdev,
														
 
															+			      struct bio **bio_ret,
														
 
															+			      unsigned long max_pages,
														
 
															+			      bio_end_io_t end_io_func,
														
 
															+			      int mirror_num)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct bio *bio;
														
 
															+	int nr;
														
 
															+
														
 
															+	if (bio_ret && *bio_ret) {
														
 
															+		bio = *bio_ret;
														
 
															+		if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
														
 
															+		    (tree->ops && tree->ops->merge_bio_hook &&
														
 
															+		     tree->ops->merge_bio_hook(page, offset, size, bio)) ||
														
 
															+		    bio_add_page(bio, page, size, offset) < size) {
														
 
															+			ret = submit_one_bio(rw, bio, mirror_num);
														
 
															+			bio = NULL;
														
 
															+		} else {
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+	nr = bio_get_nr_vecs(bdev);
														
 
															+	bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
														
 
															+	if (!bio) {
														
 
															+		printk("failed to allocate bio nr %d\n", nr);
														
 
															+	}
														
 
															+
														
 
															+
														
 
															+	bio_add_page(bio, page, size, offset);
														
 
															+	bio->bi_end_io = end_io_func;
														
 
															+	bio->bi_private = tree;
														
 
															+
														
 
															+	if (bio_ret) {
														
 
															+		*bio_ret = bio;
														
 
															+	} else {
														
 
															+		ret = submit_one_bio(rw, bio, mirror_num);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void set_page_extent_mapped(struct page *page)
														
 
															+{
														
 
															+	if (!PagePrivate(page)) {
														
 
															+		SetPagePrivate(page);
														
 
															+		page_cache_get(page);
														
 
															+		set_page_private(page, EXTENT_PAGE_PRIVATE);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+void set_page_extent_head(struct page *page, unsigned long len)
														
 
															+{
														
 
															+	set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * basic readpage implementation.  Locked extent state structs are inserted
														
 
															+ * into the tree that are removed when the IO is done (by the end_io
														
 
															+ * handlers)
														
 
															+ */
														
 
															+static int __extent_read_full_page(struct extent_io_tree *tree,
														
 
															+				   struct page *page,
														
 
															+				   get_extent_t *get_extent,
														
 
															+				   struct bio **bio, int mirror_num)
														
 
															+{
														
 
															+	struct inode *inode = page->mapping->host;
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 page_end = start + PAGE_CACHE_SIZE - 1;
														
 
															+	u64 end;
														
 
															+	u64 cur = start;
														
 
															+	u64 extent_offset;
														
 
															+	u64 last_byte = i_size_read(inode);
														
 
															+	u64 block_start;
														
 
															+	u64 cur_end;
														
 
															+	sector_t sector;
														
 
															+	struct extent_map *em;
														
 
															+	struct block_device *bdev;
														
 
															+	int ret;
														
 
															+	int nr = 0;
														
 
															+	size_t page_offset = 0;
														
 
															+	size_t iosize;
														
 
															+	size_t blocksize = inode->i_sb->s_blocksize;
														
 
															+
														
 
															+	set_page_extent_mapped(page);
														
 
															+
														
 
															+	end = page_end;
														
 
															+	lock_extent(tree, start, end, GFP_NOFS);
														
 
															+
														
 
															+	while (cur <= end) {
														
 
															+		if (cur >= last_byte) {
														
 
															+			char *userpage;
														
 
															+			iosize = PAGE_CACHE_SIZE - page_offset;
														
 
															+			userpage = kmap_atomic(page, KM_USER0);
														
 
															+			memset(userpage + page_offset, 0, iosize);
														
 
															+			flush_dcache_page(page);
														
 
															+			kunmap_atomic(userpage, KM_USER0);
														
 
															+			set_extent_uptodate(tree, cur, cur + iosize - 1,
														
 
															+					    GFP_NOFS);
														
 
															+			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
														
 
															+			break;
														
 
															+		}
														
 
															+		em = get_extent(inode, page, page_offset, cur,
														
 
															+				end - cur + 1, 0);
														
 
															+		if (IS_ERR(em) || !em) {
														
 
															+			SetPageError(page);
														
 
															+			unlock_extent(tree, cur, end, GFP_NOFS);
														
 
															+			break;
														
 
															+		}
														
 
															+		extent_offset = cur - em->start;
														
 
															+		if (extent_map_end(em) <= cur) {
														
 
															+printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
														
 
															+		}
														
 
															+		BUG_ON(extent_map_end(em) <= cur);
														
 
															+		if (end < cur) {
														
 
															+printk("2bad mapping end %Lu cur %Lu\n", end, cur);
														
 
															+		}
														
 
															+		BUG_ON(end < cur);
														
 
															+
														
 
															+		iosize = min(extent_map_end(em) - cur, end - cur + 1);
														
 
															+		cur_end = min(extent_map_end(em) - 1, end);
														
 
															+		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
														
 
															+		sector = (em->block_start + extent_offset) >> 9;
														
 
															+		bdev = em->bdev;
														
 
															+		block_start = em->block_start;
														
 
															+		free_extent_map(em);
														
 
															+		em = NULL;
														
 
															+
														
 
															+		/* we've found a hole, just zero and go on */
														
 
															+		if (block_start == EXTENT_MAP_HOLE) {
														
 
															+			char *userpage;
														
 
															+			userpage = kmap_atomic(page, KM_USER0);
														
 
															+			memset(userpage + page_offset, 0, iosize);
														
 
															+			flush_dcache_page(page);
														
 
															+			kunmap_atomic(userpage, KM_USER0);
														
 
															+
														
 
															+			set_extent_uptodate(tree, cur, cur + iosize - 1,
														
 
															+					    GFP_NOFS);
														
 
															+			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
														
 
															+			cur = cur + iosize;
														
 
															+			page_offset += iosize;
														
 
															+			continue;
														
 
															+		}
														
 
															+		/* the get_extent function already copied into the page */
														
 
															+		if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
														
 
															+			check_page_uptodate(tree, page);
														
 
															+			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
														
 
															+			cur = cur + iosize;
														
 
															+			page_offset += iosize;
														
 
															+			continue;
														
 
															+		}
														
 
															+		/* we have an inline extent but it didn't get marked up
														
 
															+		 * to date.  Error out
														
 
															+		 */
														
 
															+		if (block_start == EXTENT_MAP_INLINE) {
														
 
															+			SetPageError(page);
														
 
															+			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
														
 
															+			cur = cur + iosize;
														
 
															+			page_offset += iosize;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		ret = 0;
														
 
															+		if (tree->ops && tree->ops->readpage_io_hook) {
														
 
															+			ret = tree->ops->readpage_io_hook(page, cur,
														
 
															+							  cur + iosize - 1);
														
 
															+		}
														
 
															+		if (!ret) {
														
 
															+			unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
														
 
															+			pnr -= page->index;
														
 
															+			ret = submit_extent_page(READ, tree, page,
														
 
															+					 sector, iosize, page_offset,
														
 
															+					 bdev, bio, pnr,
														
 
															+					 end_bio_extent_readpage, mirror_num);
														
 
															+			nr++;
														
 
															+		}
														
 
															+		if (ret)
														
 
															+			SetPageError(page);
														
 
															+		cur = cur + iosize;
														
 
															+		page_offset += iosize;
														
 
															+	}
														
 
															+	if (!nr) {
														
 
															+		if (!PageError(page))
														
 
															+			SetPageUptodate(page);
														
 
															+		unlock_page(page);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
														
 
															+			    get_extent_t *get_extent)
														
 
															+{
														
 
															+	struct bio *bio = NULL;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
														
 
															+	if (bio)
														
 
															+		submit_one_bio(READ, bio, 0);
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_read_full_page);
														
 
															+
														
 
															+/*
														
 
															+ * the writepage semantics are similar to regular writepage.  extent
														
 
															+ * records are inserted to lock ranges in the tree, and as dirty areas
														
 
															+ * are found, they are marked writeback.  Then the lock bits are removed
														
 
															+ * and the end_io handler clears the writeback ranges
														
 
															+ */
														
 
															+static int __extent_writepage(struct page *page, struct writeback_control *wbc,
														
 
															+			      void *data)
														
 
															+{
														
 
															+	struct inode *inode = page->mapping->host;
														
 
															+	struct extent_page_data *epd = data;
														
 
															+	struct extent_io_tree *tree = epd->tree;
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 delalloc_start;
														
 
															+	u64 page_end = start + PAGE_CACHE_SIZE - 1;
														
 
															+	u64 end;
														
 
															+	u64 cur = start;
														
 
															+	u64 extent_offset;
														
 
															+	u64 last_byte = i_size_read(inode);
														
 
															+	u64 block_start;
														
 
															+	u64 iosize;
														
 
															+	u64 unlock_start;
														
 
															+	sector_t sector;
														
 
															+	struct extent_map *em;
														
 
															+	struct block_device *bdev;
														
 
															+	int ret;
														
 
															+	int nr = 0;
														
 
															+	size_t pg_offset = 0;
														
 
															+	size_t blocksize;
														
 
															+	loff_t i_size = i_size_read(inode);
														
 
															+	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
														
 
															+	u64 nr_delalloc;
														
 
															+	u64 delalloc_end;
														
 
															+
														
 
															+	WARN_ON(!PageLocked(page));
														
 
															+	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
														
 
															+	if (page->index > end_index ||
														
 
															+	   (page->index == end_index && !pg_offset)) {
														
 
															+		page->mapping->a_ops->invalidatepage(page, 0);
														
 
															+		unlock_page(page);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	if (page->index == end_index) {
														
 
															+		char *userpage;
														
 
															+
														
 
															+		userpage = kmap_atomic(page, KM_USER0);
														
 
															+		memset(userpage + pg_offset, 0,
														
 
															+		       PAGE_CACHE_SIZE - pg_offset);
														
 
															+		kunmap_atomic(userpage, KM_USER0);
														
 
															+		flush_dcache_page(page);
														
 
															+	}
														
 
															+	pg_offset = 0;
														
 
															+
														
 
															+	set_page_extent_mapped(page);
														
 
															+
														
 
															+	delalloc_start = start;
														
 
															+	delalloc_end = 0;
														
 
															+	while(delalloc_end < page_end) {
														
 
															+		nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
														
 
															+						       &delalloc_end,
														
 
															+						       128 * 1024 * 1024);
														
 
															+		if (nr_delalloc == 0) {
														
 
															+			delalloc_start = delalloc_end + 1;
														
 
															+			continue;
														
 
															+		}
														
 
															+		tree->ops->fill_delalloc(inode, delalloc_start,
														
 
															+					 delalloc_end);
														
 
															+		clear_extent_bit(tree, delalloc_start,
														
 
															+				 delalloc_end,
														
 
															+				 EXTENT_LOCKED | EXTENT_DELALLOC,
														
 
															+				 1, 0, GFP_NOFS);
														
 
															+		delalloc_start = delalloc_end + 1;
														
 
															+	}
														
 
															+	lock_extent(tree, start, page_end, GFP_NOFS);
														
 
															+	unlock_start = start;
														
 
															+
														
 
															+	if (tree->ops && tree->ops->writepage_start_hook) {
														
 
															+		ret = tree->ops->writepage_start_hook(page, start, page_end);
														
 
															+		if (ret == -EAGAIN) {
														
 
															+			unlock_extent(tree, start, page_end, GFP_NOFS);
														
 
															+			redirty_page_for_writepage(wbc, page);
														
 
															+			unlock_page(page);
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	end = page_end;
														
 
															+	if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
														
 
															+		printk("found delalloc bits after lock_extent\n");
														
 
															+	}
														
 
															+
														
 
															+	if (last_byte <= start) {
														
 
															+		clear_extent_dirty(tree, start, page_end, GFP_NOFS);
														
 
															+		unlock_extent(tree, start, page_end, GFP_NOFS);
														
 
															+		if (tree->ops && tree->ops->writepage_end_io_hook)
														
 
															+			tree->ops->writepage_end_io_hook(page, start,
														
 
															+							 page_end, NULL, 1);
														
 
															+		unlock_start = page_end + 1;
														
 
															+		goto done;
														
 
															+	}
														
 
															+
														
 
															+	set_extent_uptodate(tree, start, page_end, GFP_NOFS);
														
 
															+	blocksize = inode->i_sb->s_blocksize;
														
 
															+
														
 
															+	while (cur <= end) {
														
 
															+		if (cur >= last_byte) {
														
 
															+			clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
														
 
															+			unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
														
 
															+			if (tree->ops && tree->ops->writepage_end_io_hook)
														
 
															+				tree->ops->writepage_end_io_hook(page, cur,
														
 
															+							 page_end, NULL, 1);
														
 
															+			unlock_start = page_end + 1;
														
 
															+			break;
														
 
															+		}
														
 
															+		em = epd->get_extent(inode, page, pg_offset, cur,
														
 
															+				     end - cur + 1, 1);
														
 
															+		if (IS_ERR(em) || !em) {
														
 
															+			SetPageError(page);
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		extent_offset = cur - em->start;
														
 
															+		BUG_ON(extent_map_end(em) <= cur);
														
 
															+		BUG_ON(end < cur);
														
 
															+		iosize = min(extent_map_end(em) - cur, end - cur + 1);
														
 
															+		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
														
 
															+		sector = (em->block_start + extent_offset) >> 9;
														
 
															+		bdev = em->bdev;
														
 
															+		block_start = em->block_start;
														
 
															+		free_extent_map(em);
														
 
															+		em = NULL;
														
 
															+
														
 
															+		if (block_start == EXTENT_MAP_HOLE ||
														
 
															+		    block_start == EXTENT_MAP_INLINE) {
														
 
															+			clear_extent_dirty(tree, cur,
														
 
															+					   cur + iosize - 1, GFP_NOFS);
														
 
															+
														
 
															+			unlock_extent(tree, unlock_start, cur + iosize -1,
														
 
															+				      GFP_NOFS);
														
 
															+
														
 
															+			if (tree->ops && tree->ops->writepage_end_io_hook)
														
 
															+				tree->ops->writepage_end_io_hook(page, cur,
														
 
															+							 cur + iosize - 1,
														
 
															+							 NULL, 1);
														
 
															+			cur = cur + iosize;
														
 
															+			pg_offset += iosize;
														
 
															+			unlock_start = cur;
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		/* leave this out until we have a page_mkwrite call */
														
 
															+		if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
														
 
															+				   EXTENT_DIRTY, 0)) {
														
 
															+			cur = cur + iosize;
														
 
															+			pg_offset += iosize;
														
 
															+			continue;
														
 
															+		}
														
 
															+		clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
														
 
															+		if (tree->ops && tree->ops->writepage_io_hook) {
														
 
															+			ret = tree->ops->writepage_io_hook(page, cur,
														
 
															+						cur + iosize - 1);
														
 
															+		} else {
														
 
															+			ret = 0;
														
 
															+		}
														
 
															+		if (ret) {
														
 
															+			SetPageError(page);
														
 
															+		} else {
														
 
															+			unsigned long max_nr = end_index + 1;
														
 
															+
														
 
															+			set_range_writeback(tree, cur, cur + iosize - 1);
														
 
															+			if (!PageWriteback(page)) {
														
 
															+				printk("warning page %lu not writeback, "
														
 
															+				       "cur %llu end %llu\n", page->index,
														
 
															+				       (unsigned long long)cur,
														
 
															+				       (unsigned long long)end);
														
 
															+			}
														
 
															+
														
 
															+			ret = submit_extent_page(WRITE, tree, page, sector,
														
 
															+						 iosize, pg_offset, bdev,
														
 
															+						 &epd->bio, max_nr,
														
 
															+						 end_bio_extent_writepage, 0);
														
 
															+			if (ret)
														
 
															+				SetPageError(page);
														
 
															+		}
														
 
															+		cur = cur + iosize;
														
 
															+		pg_offset += iosize;
														
 
															+		nr++;
														
 
															+	}
														
 
															+done:
														
 
															+	if (nr == 0) {
														
 
															+		/* make sure the mapping tag for page dirty gets cleared */
														
 
															+		set_page_writeback(page);
														
 
															+		end_page_writeback(page);
														
 
															+	}
														
 
															+	if (unlock_start <= page_end)
														
 
															+		unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
														
 
															+	unlock_page(page);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
														
 
															+ * @mapping: address space structure to write
														
 
															+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
														
 
															+ * @writepage: function called for each page
														
 
															+ * @data: data passed to writepage function
														
 
															+ *
														
 
															+ * If a page is already under I/O, write_cache_pages() skips it, even
														
 
															+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
														
 
															+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
														
 
															+ * and msync() need to guarantee that all the data which was dirty at the time
														
 
															+ * the call was made get new I/O started against them.  If wbc->sync_mode is
														
 
															+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
														
 
															+ * existing IO to complete.
														
 
															+ */
														
 
															+int extent_write_cache_pages(struct extent_io_tree *tree,
														
 
															+			     struct address_space *mapping,
														
 
															+			     struct writeback_control *wbc,
														
 
															+			     writepage_t writepage, void *data)
														
 
															+{
														
 
															+	struct backing_dev_info *bdi = mapping->backing_dev_info;
														
 
															+	int ret = 0;
														
 
															+	int done = 0;
														
 
															+	struct pagevec pvec;
														
 
															+	int nr_pages;
														
 
															+	pgoff_t index;
														
 
															+	pgoff_t end;		/* Inclusive */
														
 
															+	int scanned = 0;
														
 
															+	int range_whole = 0;
														
 
															+
														
 
															+	if (wbc->nonblocking && bdi_write_congested(bdi)) {
														
 
															+		wbc->encountered_congestion = 1;
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	pagevec_init(&pvec, 0);
														
 
															+	if (wbc->range_cyclic) {
														
 
															+		index = mapping->writeback_index; /* Start from prev offset */
														
 
															+		end = -1;
														
 
															+	} else {
														
 
															+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
														
 
															+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
														
 
															+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
														
 
															+			range_whole = 1;
														
 
															+		scanned = 1;
														
 
															+	}
														
 
															+retry:
														
 
															+	while (!done && (index <= end) &&
														
 
															+	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
														
 
															+					      PAGECACHE_TAG_DIRTY,
														
 
															+					      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
														
 
															+		unsigned i;
														
 
															+
														
 
															+		scanned = 1;
														
 
															+		for (i = 0; i < nr_pages; i++) {
														
 
															+			struct page *page = pvec.pages[i];
														
 
															+
														
 
															+			/*
														
 
															+			 * At this point we hold neither mapping->tree_lock nor
														
 
															+			 * lock on the page itself: the page may be truncated or
														
 
															+			 * invalidated (changing page->mapping to NULL), or even
														
 
															+			 * swizzled back from swapper_space to tmpfs file
														
 
															+			 * mapping
														
 
															+			 */
														
 
															+			if (tree->ops && tree->ops->write_cache_pages_lock_hook)
														
 
															+				tree->ops->write_cache_pages_lock_hook(page);
														
 
															+			else
														
 
															+				lock_page(page);
														
 
															+
														
 
															+			if (unlikely(page->mapping != mapping)) {
														
 
															+				unlock_page(page);
														
 
															+				continue;
														
 
															+			}
														
 
															+
														
 
															+			if (!wbc->range_cyclic && page->index > end) {
														
 
															+				done = 1;
														
 
															+				unlock_page(page);
														
 
															+				continue;
														
 
															+			}
														
 
															+
														
 
															+			if (wbc->sync_mode != WB_SYNC_NONE)
														
 
															+				wait_on_page_writeback(page);
														
 
															+
														
 
															+			if (PageWriteback(page) ||
														
 
															+			    !clear_page_dirty_for_io(page)) {
														
 
															+				unlock_page(page);
														
 
															+				continue;
														
 
															+			}
														
 
															+
														
 
															+			ret = (*writepage)(page, wbc, data);
														
 
															+
														
 
															+			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
														
 
															+				unlock_page(page);
														
 
															+				ret = 0;
														
 
															+			}
														
 
															+			if (ret || (--(wbc->nr_to_write) <= 0))
														
 
															+				done = 1;
														
 
															+			if (wbc->nonblocking && bdi_write_congested(bdi)) {
														
 
															+				wbc->encountered_congestion = 1;
														
 
															+				done = 1;
														
 
															+			}
														
 
															+		}
														
 
															+		pagevec_release(&pvec);
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	if (!scanned && !done) {
														
 
															+		/*
														
 
															+		 * We hit the last page and there is more work to be done: wrap
														
 
															+		 * back to the start of the file
														
 
															+		 */
														
 
															+		scanned = 1;
														
 
															+		index = 0;
														
 
															+		goto retry;
														
 
															+	}
														
 
															+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
														
 
															+		mapping->writeback_index = index;
														
 
															+
														
 
															+	if (wbc->range_cont)
														
 
															+		wbc->range_start = index << PAGE_CACHE_SHIFT;
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_write_cache_pages);
														
 
															+
														
 
															+int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
														
 
															+			  get_extent_t *get_extent,
														
 
															+			  struct writeback_control *wbc)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct address_space *mapping = page->mapping;
														
 
															+	struct extent_page_data epd = {
														
 
															+		.bio = NULL,
														
 
															+		.tree = tree,
														
 
															+		.get_extent = get_extent,
														
 
															+	};
														
 
															+	struct writeback_control wbc_writepages = {
														
 
															+		.bdi		= wbc->bdi,
														
 
															+		.sync_mode	= WB_SYNC_NONE,
														
 
															+		.older_than_this = NULL,
														
 
															+		.nr_to_write	= 64,
														
 
															+		.range_start	= page_offset(page) + PAGE_CACHE_SIZE,
														
 
															+		.range_end	= (loff_t)-1,
														
 
															+	};
														
 
															+
														
 
															+
														
 
															+	ret = __extent_writepage(page, wbc, &epd);
														
 
															+
														
 
															+	extent_write_cache_pages(tree, mapping, &wbc_writepages,
														
 
															+				 __extent_writepage, &epd);
														
 
															+	if (epd.bio) {
														
 
															+		submit_one_bio(WRITE, epd.bio, 0);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_write_full_page);
														
 
															+
														
 
															+
														
 
															+int extent_writepages(struct extent_io_tree *tree,
														
 
															+		      struct address_space *mapping,
														
 
															+		      get_extent_t *get_extent,
														
 
															+		      struct writeback_control *wbc)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct extent_page_data epd = {
														
 
															+		.bio = NULL,
														
 
															+		.tree = tree,
														
 
															+		.get_extent = get_extent,
														
 
															+	};
														
 
															+
														
 
															+	ret = extent_write_cache_pages(tree, mapping, wbc,
														
 
															+				       __extent_writepage, &epd);
														
 
															+	if (epd.bio) {
														
 
															+		submit_one_bio(WRITE, epd.bio, 0);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_writepages);
														
 
															+
														
 
															+int extent_readpages(struct extent_io_tree *tree,
														
 
															+		     struct address_space *mapping,
														
 
															+		     struct list_head *pages, unsigned nr_pages,
														
 
															+		     get_extent_t get_extent)
														
 
															+{
														
 
															+	struct bio *bio = NULL;
														
 
															+	unsigned page_idx;
														
 
															+	struct pagevec pvec;
														
 
															+
														
 
															+	pagevec_init(&pvec, 0);
														
 
															+	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
														
 
															+		struct page *page = list_entry(pages->prev, struct page, lru);
														
 
															+
														
 
															+		prefetchw(&page->flags);
														
 
															+		list_del(&page->lru);
														
 
															+		/*
														
 
															+		 * what we want to do here is call add_to_page_cache_lru,
														
 
															+		 * but that isn't exported, so we reproduce it here
														
 
															+		 */
														
 
															+		if (!add_to_page_cache(page, mapping,
														
 
															+					page->index, GFP_KERNEL)) {
														
 
															+
														
 
															+			/* open coding of lru_cache_add, also not exported */
														
 
															+			page_cache_get(page);
														
 
															+			if (!pagevec_add(&pvec, page))
														
 
															+				__pagevec_lru_add(&pvec);
														
 
															+			__extent_read_full_page(tree, page, get_extent,
														
 
															+						&bio, 0);
														
 
															+		}
														
 
															+		page_cache_release(page);
														
 
															+	}
														
 
															+	if (pagevec_count(&pvec))
														
 
															+		__pagevec_lru_add(&pvec);
														
 
															+	BUG_ON(!list_empty(pages));
														
 
															+	if (bio)
														
 
															+		submit_one_bio(READ, bio, 0);
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_readpages);
														
 
															+
														
 
															+/*
														
 
															+ * basic invalidatepage code, this waits on any locked or writeback
														
 
															+ * ranges corresponding to the page, and then deletes any extent state
														
 
															+ * records from the tree
														
 
															+ */
														
 
															+int extent_invalidatepage(struct extent_io_tree *tree,
														
 
															+			  struct page *page, unsigned long offset)
														
 
															+{
														
 
															+	u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
														
 
															+	u64 end = start + PAGE_CACHE_SIZE - 1;
														
 
															+	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
														
 
															+
														
 
															+	start += (offset + blocksize -1) & ~(blocksize - 1);
														
 
															+	if (start > end)
														
 
															+		return 0;
														
 
															+
														
 
															+	lock_extent(tree, start, end, GFP_NOFS);
														
 
															+	wait_on_extent_writeback(tree, start, end);
														
 
															+	clear_extent_bit(tree, start, end,
														
 
															+			 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
														
 
															+			 1, 1, GFP_NOFS);
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_invalidatepage);
														
 
															+
														
 
															+/*
														
 
															+ * simple commit_write call, set_range_dirty is used to mark both
														
 
															+ * the pages and the extent records as dirty
														
 
															+ */
														
 
															+int extent_commit_write(struct extent_io_tree *tree,
														
 
															+			struct inode *inode, struct page *page,
														
 
															+			unsigned from, unsigned to)
														
 
															+{
														
 
															+	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
														
 
															+
														
 
															+	set_page_extent_mapped(page);
														
 
															+	set_page_dirty(page);
														
 
															+
														
 
															+	if (pos > inode->i_size) {
														
 
															+		i_size_write(inode, pos);
														
 
															+		mark_inode_dirty(inode);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_commit_write);
														
 
															+
														
 
															+int extent_prepare_write(struct extent_io_tree *tree,
														
 
															+			 struct inode *inode, struct page *page,
														
 
															+			 unsigned from, unsigned to, get_extent_t *get_extent)
														
 
															+{
														
 
															+	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
														
 
															+	u64 block_start;
														
 
															+	u64 orig_block_start;
														
 
															+	u64 block_end;
														
 
															+	u64 cur_end;
														
 
															+	struct extent_map *em;
														
 
															+	unsigned blocksize = 1 << inode->i_blkbits;
														
 
															+	size_t page_offset = 0;
														
 
															+	size_t block_off_start;
														
 
															+	size_t block_off_end;
														
 
															+	int err = 0;
														
 
															+	int iocount = 0;
														
 
															+	int ret = 0;
														
 
															+	int isnew;
														
 
															+
														
 
															+	set_page_extent_mapped(page);
														
 
															+
														
 
															+	block_start = (page_start + from) & ~((u64)blocksize - 1);
														
 
															+	block_end = (page_start + to - 1) | (blocksize - 1);
														
 
															+	orig_block_start = block_start;
														
 
															+
														
 
															+	lock_extent(tree, page_start, page_end, GFP_NOFS);
														
 
															+	while(block_start <= block_end) {
														
 
															+		em = get_extent(inode, page, page_offset, block_start,
														
 
															+				block_end - block_start + 1, 1);
														
 
															+		if (IS_ERR(em) || !em) {
														
 
															+			goto err;
														
 
															+		}
														
 
															+		cur_end = min(block_end, extent_map_end(em) - 1);
														
 
															+		block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
														
 
															+		block_off_end = block_off_start + blocksize;
														
 
															+		isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
														
 
															+
														
 
															+		if (!PageUptodate(page) && isnew &&
														
 
															+		    (block_off_end > to || block_off_start < from)) {
														
 
															+			void *kaddr;
														
 
															+
														
 
															+			kaddr = kmap_atomic(page, KM_USER0);
														
 
															+			if (block_off_end > to)
														
 
															+				memset(kaddr + to, 0, block_off_end - to);
														
 
															+			if (block_off_start < from)
														
 
															+				memset(kaddr + block_off_start, 0,
														
 
															+				       from - block_off_start);
														
 
															+			flush_dcache_page(page);
														
 
															+			kunmap_atomic(kaddr, KM_USER0);
														
 
															+		}
														
 
															+		if ((em->block_start != EXTENT_MAP_HOLE &&
														
 
															+		     em->block_start != EXTENT_MAP_INLINE) &&
														
 
															+		    !isnew && !PageUptodate(page) &&
														
 
															+		    (block_off_end > to || block_off_start < from) &&
														
 
															+		    !test_range_bit(tree, block_start, cur_end,
														
 
															+				    EXTENT_UPTODATE, 1)) {
														
 
															+			u64 sector;
														
 
															+			u64 extent_offset = block_start - em->start;
														
 
															+			size_t iosize;
														
 
															+			sector = (em->block_start + extent_offset) >> 9;
														
 
															+			iosize = (cur_end - block_start + blocksize) &
														
 
															+				~((u64)blocksize - 1);
														
 
															+			/*
														
 
															+			 * we've already got the extent locked, but we
														
 
															+			 * need to split the state such that our end_bio
														
 
															+			 * handler can clear the lock.
														
 
															+			 */
														
 
															+			set_extent_bit(tree, block_start,
														
 
															+				       block_start + iosize - 1,
														
 
															+				       EXTENT_LOCKED, 0, NULL, GFP_NOFS);
														
 
															+			ret = submit_extent_page(READ, tree, page,
														
 
															+					 sector, iosize, page_offset, em->bdev,
														
 
															+					 NULL, 1,
														
 
															+					 end_bio_extent_preparewrite, 0);
														
 
															+			iocount++;
														
 
															+			block_start = block_start + iosize;
														
 
															+		} else {
														
 
															+			set_extent_uptodate(tree, block_start, cur_end,
														
 
															+					    GFP_NOFS);
														
 
															+			unlock_extent(tree, block_start, cur_end, GFP_NOFS);
														
 
															+			block_start = cur_end + 1;
														
 
															+		}
														
 
															+		page_offset = block_start & (PAGE_CACHE_SIZE - 1);
														
 
															+		free_extent_map(em);
														
 
															+	}
														
 
															+	if (iocount) {
														
 
															+		wait_extent_bit(tree, orig_block_start,
														
 
															+				block_end, EXTENT_LOCKED);
														
 
															+	}
														
 
															+	check_page_uptodate(tree, page);
														
 
															+err:
														
 
															+	/* FIXME, zero out newly allocated blocks on error */
														
 
															+	return err;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_prepare_write);
														
 
															+
														
 
															+/*
														
 
															+ * a helper for releasepage, this tests for areas of the page that
														
 
															+ * are locked or under IO and drops the related state bits if it is safe
														
 
															+ * to drop the page.
														
 
															+ */
														
 
															+int try_release_extent_state(struct extent_map_tree *map,
														
 
															+			     struct extent_io_tree *tree, struct page *page,
														
 
															+			     gfp_t mask)
														
 
															+{
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 end = start + PAGE_CACHE_SIZE - 1;
														
 
															+	int ret = 1;
														
 
															+
														
 
															+	if (test_range_bit(tree, start, end,
														
 
															+			   EXTENT_IOBITS | EXTENT_ORDERED, 0))
														
 
															+		ret = 0;
														
 
															+	else {
														
 
															+		if ((mask & GFP_NOFS) == GFP_NOFS)
														
 
															+			mask = GFP_NOFS;
														
 
															+		clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
														
 
															+				 1, 1, mask);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(try_release_extent_state);
														
 
															+
														
 
															+/*
														
 
															+ * a helper for releasepage.  As long as there are no locked extents
														
 
															+ * in the range corresponding to the page, both state records and extent
														
 
															+ * map records are removed
														
 
															+ */
														
 
															+int try_release_extent_mapping(struct extent_map_tree *map,
														
 
															+			       struct extent_io_tree *tree, struct page *page,
														
 
															+			       gfp_t mask)
														
 
															+{
														
 
															+	struct extent_map *em;
														
 
															+	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+	u64 end = start + PAGE_CACHE_SIZE - 1;
														
 
															+
														
 
															+	if ((mask & __GFP_WAIT) &&
														
 
															+	    page->mapping->host->i_size > 16 * 1024 * 1024) {
														
 
															+		u64 len;
														
 
															+		while (start <= end) {
														
 
															+			len = end - start + 1;
														
 
															+			spin_lock(&map->lock);
														
 
															+			em = lookup_extent_mapping(map, start, len);
														
 
															+			if (!em || IS_ERR(em)) {
														
 
															+				spin_unlock(&map->lock);
														
 
															+				break;
														
 
															+			}
														
 
															+			if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
														
 
															+			    em->start != start) {
														
 
															+				spin_unlock(&map->lock);
														
 
															+				free_extent_map(em);
														
 
															+				break;
														
 
															+			}
														
 
															+			if (!test_range_bit(tree, em->start,
														
 
															+					    extent_map_end(em) - 1,
														
 
															+					    EXTENT_LOCKED, 0)) {
														
 
															+				remove_extent_mapping(map, em);
														
 
															+				/* once for the rb tree */
														
 
															+				free_extent_map(em);
														
 
															+			}
														
 
															+			start = extent_map_end(em);
														
 
															+			spin_unlock(&map->lock);
														
 
															+
														
 
															+			/* once for us */
														
 
															+			free_extent_map(em);
														
 
															+		}
														
 
															+	}
														
 
															+	return try_release_extent_state(map, tree, page, mask);
														
 
															+}
														
 
															+EXPORT_SYMBOL(try_release_extent_mapping);
														
 
															+
														
 
															+sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
														
 
															+		get_extent_t *get_extent)
														
 
															+{
														
 
															+	struct inode *inode = mapping->host;
														
 
															+	u64 start = iblock << inode->i_blkbits;
														
 
															+	sector_t sector = 0;
														
 
															+	struct extent_map *em;
														
 
															+
														
 
															+	em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
														
 
															+	if (!em || IS_ERR(em))
														
 
															+		return 0;
														
 
															+
														
 
															+	if (em->block_start == EXTENT_MAP_INLINE ||
														
 
															+	    em->block_start == EXTENT_MAP_HOLE)
														
 
															+		goto out;
														
 
															+
														
 
															+	sector = (em->block_start + start - em->start) >> inode->i_blkbits;
														
 
															+out:
														
 
															+	free_extent_map(em);
														
 
															+	return sector;
														
 
															+}
														
 
															+
														
 
															+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
														
 
															+					      unsigned long i)
														
 
															+{
														
 
															+	struct page *p;
														
 
															+	struct address_space *mapping;
														
 
															+
														
 
															+	if (i == 0)
														
 
															+		return eb->first_page;
														
 
															+	i += eb->start >> PAGE_CACHE_SHIFT;
														
 
															+	mapping = eb->first_page->mapping;
														
 
															+	if (!mapping)
														
 
															+		return NULL;
														
 
															+
														
 
															+	/*
														
 
															+	 * extent_buffer_page is only called after pinning the page
														
 
															+	 * by increasing the reference count.  So we know the page must
														
 
															+	 * be in the radix tree.
														
 
															+	 */
														
 
															+	rcu_read_lock();
														
 
															+	p = radix_tree_lookup(&mapping->page_tree, i);
														
 
															+	rcu_read_unlock();
														
 
															+
														
 
															+	return p;
														
 
															+}
														
 
															+
														
 
															+static inline unsigned long num_extent_pages(u64 start, u64 len)
														
 
															+{
														
 
															+	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
														
 
															+		(start >> PAGE_CACHE_SHIFT);
														
 
															+}
														
 
															+
														
 
															+static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
														
 
															+						   u64 start,
														
 
															+						   unsigned long len,
														
 
															+						   gfp_t mask)
														
 
															+{
														
 
															+	struct extent_buffer *eb = NULL;
														
 
															+#ifdef LEAK_DEBUG
														
 
															+	unsigned long flags;
														
 
															+#endif
														
 
															+
														
 
															+	eb = kmem_cache_zalloc(extent_buffer_cache, mask);
														
 
															+	eb->start = start;
														
 
															+	eb->len = len;
														
 
															+	mutex_init(&eb->mutex);
														
 
															+#ifdef LEAK_DEBUG
														
 
															+	spin_lock_irqsave(&leak_lock, flags);
														
 
															+	list_add(&eb->leak_list, &buffers);
														
 
															+	spin_unlock_irqrestore(&leak_lock, flags);
														
 
															+#endif
														
 
															+	atomic_set(&eb->refs, 1);
														
 
															+
														
 
															+	return eb;
														
 
															+}
														
 
															+
														
 
															+static void __free_extent_buffer(struct extent_buffer *eb)
														
 
															+{
														
 
															+#ifdef LEAK_DEBUG
														
 
															+	unsigned long flags;
														
 
															+	spin_lock_irqsave(&leak_lock, flags);
														
 
															+	list_del(&eb->leak_list);
														
 
															+	spin_unlock_irqrestore(&leak_lock, flags);
														
 
															+#endif
														
 
															+	kmem_cache_free(extent_buffer_cache, eb);
														
 
															+}
														
 
															+
														
 
															+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
														
 
															+					  u64 start, unsigned long len,
														
 
															+					  struct page *page0,
														
 
															+					  gfp_t mask)
														
 
															+{
														
 
															+	unsigned long num_pages = num_extent_pages(start, len);
														
 
															+	unsigned long i;
														
 
															+	unsigned long index = start >> PAGE_CACHE_SHIFT;
														
 
															+	struct extent_buffer *eb;
														
 
															+	struct extent_buffer *exists = NULL;
														
 
															+	struct page *p;
														
 
															+	struct address_space *mapping = tree->mapping;
														
 
															+	int uptodate = 1;
														
 
															+
														
 
															+	spin_lock(&tree->buffer_lock);
														
 
															+	eb = buffer_search(tree, start);
														
 
															+	if (eb) {
														
 
															+		atomic_inc(&eb->refs);
														
 
															+		spin_unlock(&tree->buffer_lock);
														
 
															+		mark_page_accessed(eb->first_page);
														
 
															+		return eb;
														
 
															+	}
														
 
															+	spin_unlock(&tree->buffer_lock);
														
 
															+
														
 
															+	eb = __alloc_extent_buffer(tree, start, len, mask);
														
 
															+	if (!eb)
														
 
															+		return NULL;
														
 
															+
														
 
															+	if (page0) {
														
 
															+		eb->first_page = page0;
														
 
															+		i = 1;
														
 
															+		index++;
														
 
															+		page_cache_get(page0);
														
 
															+		mark_page_accessed(page0);
														
 
															+		set_page_extent_mapped(page0);
														
 
															+		set_page_extent_head(page0, len);
														
 
															+		uptodate = PageUptodate(page0);
														
 
															+	} else {
														
 
															+		i = 0;
														
 
															+	}
														
 
															+	for (; i < num_pages; i++, index++) {
														
 
															+		p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
														
 
															+		if (!p) {
														
 
															+			WARN_ON(1);
														
 
															+			goto free_eb;
														
 
															+		}
														
 
															+		set_page_extent_mapped(p);
														
 
															+		mark_page_accessed(p);
														
 
															+		if (i == 0) {
														
 
															+			eb->first_page = p;
														
 
															+			set_page_extent_head(p, len);
														
 
															+		} else {
														
 
															+			set_page_private(p, EXTENT_PAGE_PRIVATE);
														
 
															+		}
														
 
															+		if (!PageUptodate(p))
														
 
															+			uptodate = 0;
														
 
															+		unlock_page(p);
														
 
															+	}
														
 
															+	if (uptodate)
														
 
															+		eb->flags |= EXTENT_UPTODATE;
														
 
															+	eb->flags |= EXTENT_BUFFER_FILLED;
														
 
															+
														
 
															+	spin_lock(&tree->buffer_lock);
														
 
															+	exists = buffer_tree_insert(tree, start, &eb->rb_node);
														
 
															+	if (exists) {
														
 
															+		/* add one reference for the caller */
														
 
															+		atomic_inc(&exists->refs);
														
 
															+		spin_unlock(&tree->buffer_lock);
														
 
															+		goto free_eb;
														
 
															+	}
														
 
															+	spin_unlock(&tree->buffer_lock);
														
 
															+
														
 
															+	/* add one reference for the tree */
														
 
															+	atomic_inc(&eb->refs);
														
 
															+	return eb;
														
 
															+
														
 
															+free_eb:
														
 
															+	if (!atomic_dec_and_test(&eb->refs))
														
 
															+		return exists;
														
 
															+	for (index = 1; index < i; index++)
														
 
															+		page_cache_release(extent_buffer_page(eb, index));
														
 
															+	page_cache_release(extent_buffer_page(eb, 0));
														
 
															+	__free_extent_buffer(eb);
														
 
															+	return exists;
														
 
															+}
														
 
															+EXPORT_SYMBOL(alloc_extent_buffer);
														
 
															+
														
 
															+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
														
 
															+					 u64 start, unsigned long len,
														
 
															+					  gfp_t mask)
														
 
															+{
														
 
															+	struct extent_buffer *eb;
														
 
															+
														
 
															+	spin_lock(&tree->buffer_lock);
														
 
															+	eb = buffer_search(tree, start);
														
 
															+	if (eb)
														
 
															+		atomic_inc(&eb->refs);
														
 
															+	spin_unlock(&tree->buffer_lock);
														
 
															+
														
 
															+	if (eb)
														
 
															+		mark_page_accessed(eb->first_page);
														
 
															+
														
 
															+	return eb;
														
 
															+}
														
 
															+EXPORT_SYMBOL(find_extent_buffer);
														
 
															+
														
 
															+void free_extent_buffer(struct extent_buffer *eb)
														
 
															+{
														
 
															+	if (!eb)
														
 
															+		return;
														
 
															+
														
 
															+	if (!atomic_dec_and_test(&eb->refs))
														
 
															+		return;
														
 
															+
														
 
															+	WARN_ON(1);
														
 
															+}
														
 
															+EXPORT_SYMBOL(free_extent_buffer);
														
 
															+
														
 
															+int clear_extent_buffer_dirty(struct extent_io_tree *tree,
														
 
															+			      struct extent_buffer *eb)
														
 
															+{
														
 
															+	int set;
														
 
															+	unsigned long i;
														
 
															+	unsigned long num_pages;
														
 
															+	struct page *page;
														
 
															+
														
 
															+	u64 start = eb->start;
														
 
															+	u64 end = start + eb->len - 1;
														
 
															+
														
 
															+	set = clear_extent_dirty(tree, start, end, GFP_NOFS);
														
 
															+	num_pages = num_extent_pages(eb->start, eb->len);
														
 
															+
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		lock_page(page);
														
 
															+		if (i == 0)
														
 
															+			set_page_extent_head(page, eb->len);
														
 
															+		else
														
 
															+			set_page_private(page, EXTENT_PAGE_PRIVATE);
														
 
															+
														
 
															+		/*
														
 
															+		 * if we're on the last page or the first page and the
														
 
															+		 * block isn't aligned on a page boundary, do extra checks
														
 
															+		 * to make sure we don't clean page that is partially dirty
														
 
															+		 */
														
 
															+		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
														
 
															+		    ((i == num_pages - 1) &&
														
 
															+		     ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
														
 
															+			start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+			end  = start + PAGE_CACHE_SIZE - 1;
														
 
															+			if (test_range_bit(tree, start, end,
														
 
															+					   EXTENT_DIRTY, 0)) {
														
 
															+				unlock_page(page);
														
 
															+				continue;
														
 
															+			}
														
 
															+		}
														
 
															+		clear_page_dirty_for_io(page);
														
 
															+		spin_lock_irq(&page->mapping->tree_lock);
														
 
															+		if (!PageDirty(page)) {
														
 
															+			radix_tree_tag_clear(&page->mapping->page_tree,
														
 
															+						page_index(page),
														
 
															+						PAGECACHE_TAG_DIRTY);
														
 
															+		}
														
 
															+		spin_unlock_irq(&page->mapping->tree_lock);
														
 
															+		unlock_page(page);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(clear_extent_buffer_dirty);
														
 
															+
														
 
															+int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
														
 
															+				    struct extent_buffer *eb)
														
 
															+{
														
 
															+	return wait_on_extent_writeback(tree, eb->start,
														
 
															+					eb->start + eb->len - 1);
														
 
															+}
														
 
															+EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
														
 
															+
														
 
															+int set_extent_buffer_dirty(struct extent_io_tree *tree,
														
 
															+			     struct extent_buffer *eb)
														
 
															+{
														
 
															+	unsigned long i;
														
 
															+	unsigned long num_pages;
														
 
															+
														
 
															+	num_pages = num_extent_pages(eb->start, eb->len);
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		struct page *page = extent_buffer_page(eb, i);
														
 
															+		/* writepage may need to do something special for the
														
 
															+		 * first page, we have to make sure page->private is
														
 
															+		 * properly set.  releasepage may drop page->private
														
 
															+		 * on us if the page isn't already dirty.
														
 
															+		 */
														
 
															+		lock_page(page);
														
 
															+		if (i == 0) {
														
 
															+			set_page_extent_head(page, eb->len);
														
 
															+		} else if (PagePrivate(page) &&
														
 
															+			   page->private != EXTENT_PAGE_PRIVATE) {
														
 
															+			set_page_extent_mapped(page);
														
 
															+		}
														
 
															+		__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
														
 
															+		set_extent_dirty(tree, page_offset(page),
														
 
															+				 page_offset(page) + PAGE_CACHE_SIZE -1,
														
 
															+				 GFP_NOFS);
														
 
															+		unlock_page(page);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_buffer_dirty);
														
 
															+
														
 
															+int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
														
 
															+				struct extent_buffer *eb)
														
 
															+{
														
 
															+	unsigned long i;
														
 
															+	struct page *page;
														
 
															+	unsigned long num_pages;
														
 
															+
														
 
															+	num_pages = num_extent_pages(eb->start, eb->len);
														
 
															+	eb->flags &= ~EXTENT_UPTODATE;
														
 
															+
														
 
															+	clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
														
 
															+			      GFP_NOFS);
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		if (page)
														
 
															+			ClearPageUptodate(page);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int set_extent_buffer_uptodate(struct extent_io_tree *tree,
														
 
															+				struct extent_buffer *eb)
														
 
															+{
														
 
															+	unsigned long i;
														
 
															+	struct page *page;
														
 
															+	unsigned long num_pages;
														
 
															+
														
 
															+	num_pages = num_extent_pages(eb->start, eb->len);
														
 
															+
														
 
															+	set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
														
 
															+			    GFP_NOFS);
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
														
 
															+		    ((i == num_pages - 1) &&
														
 
															+		     ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
														
 
															+			check_page_uptodate(tree, page);
														
 
															+			continue;
														
 
															+		}
														
 
															+		SetPageUptodate(page);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(set_extent_buffer_uptodate);
														
 
															+
														
 
															+int extent_range_uptodate(struct extent_io_tree *tree,
														
 
															+			  u64 start, u64 end)
														
 
															+{
														
 
															+	struct page *page;
														
 
															+	int ret;
														
 
															+	int pg_uptodate = 1;
														
 
															+	int uptodate;
														
 
															+	unsigned long index;
														
 
															+
														
 
															+	ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
														
 
															+	if (ret)
														
 
															+		return 1;
														
 
															+	while(start <= end) {
														
 
															+		index = start >> PAGE_CACHE_SHIFT;
														
 
															+		page = find_get_page(tree->mapping, index);
														
 
															+		uptodate = PageUptodate(page);
														
 
															+		page_cache_release(page);
														
 
															+		if (!uptodate) {
														
 
															+			pg_uptodate = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+		start += PAGE_CACHE_SIZE;
														
 
															+	}
														
 
															+	return pg_uptodate;
														
 
															+}
														
 
															+
														
 
															+int extent_buffer_uptodate(struct extent_io_tree *tree,
														
 
															+			   struct extent_buffer *eb)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	unsigned long num_pages;
														
 
															+	unsigned long i;
														
 
															+	struct page *page;
														
 
															+	int pg_uptodate = 1;
														
 
															+
														
 
															+	if (eb->flags & EXTENT_UPTODATE)
														
 
															+		return 1;
														
 
															+
														
 
															+	ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
														
 
															+			   EXTENT_UPTODATE, 1);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	num_pages = num_extent_pages(eb->start, eb->len);
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		if (!PageUptodate(page)) {
														
 
															+			pg_uptodate = 0;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+	return pg_uptodate;
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_buffer_uptodate);
														
 
															+
														
 
															+int read_extent_buffer_pages(struct extent_io_tree *tree,
														
 
															+			     struct extent_buffer *eb,
														
 
															+			     u64 start, int wait,
														
 
															+			     get_extent_t *get_extent, int mirror_num)
														
 
															+{
														
 
															+	unsigned long i;
														
 
															+	unsigned long start_i;
														
 
															+	struct page *page;
														
 
															+	int err;
														
 
															+	int ret = 0;
														
 
															+	int locked_pages = 0;
														
 
															+	int all_uptodate = 1;
														
 
															+	int inc_all_pages = 0;
														
 
															+	unsigned long num_pages;
														
 
															+	struct bio *bio = NULL;
														
 
															+
														
 
															+	if (eb->flags & EXTENT_UPTODATE)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
														
 
															+			   EXTENT_UPTODATE, 1)) {
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	if (start) {
														
 
															+		WARN_ON(start < eb->start);
														
 
															+		start_i = (start >> PAGE_CACHE_SHIFT) -
														
 
															+			(eb->start >> PAGE_CACHE_SHIFT);
														
 
															+	} else {
														
 
															+		start_i = 0;
														
 
															+	}
														
 
															+
														
 
															+	num_pages = num_extent_pages(eb->start, eb->len);
														
 
															+	for (i = start_i; i < num_pages; i++) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		if (!wait) {
														
 
															+			if (!trylock_page(page))
														
 
															+				goto unlock_exit;
														
 
															+		} else {
														
 
															+			lock_page(page);
														
 
															+		}
														
 
															+		locked_pages++;
														
 
															+		if (!PageUptodate(page)) {
														
 
															+			all_uptodate = 0;
														
 
															+		}
														
 
															+	}
														
 
															+	if (all_uptodate) {
														
 
															+		if (start_i == 0)
														
 
															+			eb->flags |= EXTENT_UPTODATE;
														
 
															+		if (ret) {
														
 
															+			printk("all up to date but ret is %d\n", ret);
														
 
															+		}
														
 
															+		goto unlock_exit;
														
 
															+	}
														
 
															+
														
 
															+	for (i = start_i; i < num_pages; i++) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		if (inc_all_pages)
														
 
															+			page_cache_get(page);
														
 
															+		if (!PageUptodate(page)) {
														
 
															+			if (start_i == 0)
														
 
															+				inc_all_pages = 1;
														
 
															+			ClearPageError(page);
														
 
															+			err = __extent_read_full_page(tree, page,
														
 
															+						      get_extent, &bio,
														
 
															+						      mirror_num);
														
 
															+			if (err) {
														
 
															+				ret = err;
														
 
															+				printk("err %d from __extent_read_full_page\n", ret);
														
 
															+			}
														
 
															+		} else {
														
 
															+			unlock_page(page);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (bio)
														
 
															+		submit_one_bio(READ, bio, mirror_num);
														
 
															+
														
 
															+	if (ret || !wait) {
														
 
															+		if (ret)
														
 
															+			printk("ret %d wait %d returning\n", ret, wait);
														
 
															+		return ret;
														
 
															+	}
														
 
															+	for (i = start_i; i < num_pages; i++) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		wait_on_page_locked(page);
														
 
															+		if (!PageUptodate(page)) {
														
 
															+			printk("page not uptodate after wait_on_page_locked\n");
														
 
															+			ret = -EIO;
														
 
															+		}
														
 
															+	}
														
 
															+	if (!ret)
														
 
															+		eb->flags |= EXTENT_UPTODATE;
														
 
															+	return ret;
														
 
															+
														
 
															+unlock_exit:
														
 
															+	i = start_i;
														
 
															+	while(locked_pages > 0) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		i++;
														
 
															+		unlock_page(page);
														
 
															+		locked_pages--;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(read_extent_buffer_pages);
														
 
															+
														
 
															+void read_extent_buffer(struct extent_buffer *eb, void *dstv,
														
 
															+			unsigned long start,
														
 
															+			unsigned long len)
														
 
															+{
														
 
															+	size_t cur;
														
 
															+	size_t offset;
														
 
															+	struct page *page;
														
 
															+	char *kaddr;
														
 
															+	char *dst = (char *)dstv;
														
 
															+	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	WARN_ON(start > eb->len);
														
 
															+	WARN_ON(start + len > eb->start + eb->len);
														
 
															+
														
 
															+	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+	while(len > 0) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+
														
 
															+		cur = min(len, (PAGE_CACHE_SIZE - offset));
														
 
															+		kaddr = kmap_atomic(page, KM_USER1);
														
 
															+		memcpy(dst, kaddr + offset, cur);
														
 
															+		kunmap_atomic(kaddr, KM_USER1);
														
 
															+
														
 
															+		dst += cur;
														
 
															+		len -= cur;
														
 
															+		offset = 0;
														
 
															+		i++;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(read_extent_buffer);
														
 
															+
														
 
															+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
														
 
															+			       unsigned long min_len, char **token, char **map,
														
 
															+			       unsigned long *map_start,
														
 
															+			       unsigned long *map_len, int km)
														
 
															+{
														
 
															+	size_t offset = start & (PAGE_CACHE_SIZE - 1);
														
 
															+	char *kaddr;
														
 
															+	struct page *p;
														
 
															+	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
														
 
															+	unsigned long end_i = (start_offset + start + min_len - 1) >>
														
 
															+		PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	if (i != end_i)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (i == 0) {
														
 
															+		offset = start_offset;
														
 
															+		*map_start = 0;
														
 
															+	} else {
														
 
															+		offset = 0;
														
 
															+		*map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
														
 
															+	}
														
 
															+	if (start + min_len > eb->len) {
														
 
															+printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	p = extent_buffer_page(eb, i);
														
 
															+	kaddr = kmap_atomic(p, km);
														
 
															+	*token = kaddr;
														
 
															+	*map = kaddr + offset;
														
 
															+	*map_len = PAGE_CACHE_SIZE - offset;
														
 
															+	return 0;
														
 
															+}
														
 
															+EXPORT_SYMBOL(map_private_extent_buffer);
														
 
															+
														
 
															+int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
														
 
															+		      unsigned long min_len,
														
 
															+		      char **token, char **map,
														
 
															+		      unsigned long *map_start,
														
 
															+		      unsigned long *map_len, int km)
														
 
															+{
														
 
															+	int err;
														
 
															+	int save = 0;
														
 
															+	if (eb->map_token) {
														
 
															+		unmap_extent_buffer(eb, eb->map_token, km);
														
 
															+		eb->map_token = NULL;
														
 
															+		save = 1;
														
 
															+	}
														
 
															+	err = map_private_extent_buffer(eb, start, min_len, token, map,
														
 
															+				       map_start, map_len, km);
														
 
															+	if (!err && save) {
														
 
															+		eb->map_token = *token;
														
 
															+		eb->kaddr = *map;
														
 
															+		eb->map_start = *map_start;
														
 
															+		eb->map_len = *map_len;
														
 
															+	}
														
 
															+	return err;
														
 
															+}
														
 
															+EXPORT_SYMBOL(map_extent_buffer);
														
 
															+
														
 
															+void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
														
 
															+{
														
 
															+	kunmap_atomic(token, km);
														
 
															+}
														
 
															+EXPORT_SYMBOL(unmap_extent_buffer);
														
 
															+
														
 
															+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
														
 
															+			  unsigned long start,
														
 
															+			  unsigned long len)
														
 
															+{
														
 
															+	size_t cur;
														
 
															+	size_t offset;
														
 
															+	struct page *page;
														
 
															+	char *kaddr;
														
 
															+	char *ptr = (char *)ptrv;
														
 
															+	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	WARN_ON(start > eb->len);
														
 
															+	WARN_ON(start + len > eb->start + eb->len);
														
 
															+
														
 
															+	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+	while(len > 0) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+
														
 
															+		cur = min(len, (PAGE_CACHE_SIZE - offset));
														
 
															+
														
 
															+		kaddr = kmap_atomic(page, KM_USER0);
														
 
															+		ret = memcmp(ptr, kaddr + offset, cur);
														
 
															+		kunmap_atomic(kaddr, KM_USER0);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		ptr += cur;
														
 
															+		len -= cur;
														
 
															+		offset = 0;
														
 
															+		i++;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(memcmp_extent_buffer);
														
 
															+
														
 
															+void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
														
 
															+			 unsigned long start, unsigned long len)
														
 
															+{
														
 
															+	size_t cur;
														
 
															+	size_t offset;
														
 
															+	struct page *page;
														
 
															+	char *kaddr;
														
 
															+	char *src = (char *)srcv;
														
 
															+	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	WARN_ON(start > eb->len);
														
 
															+	WARN_ON(start + len > eb->start + eb->len);
														
 
															+
														
 
															+	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+	while(len > 0) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		WARN_ON(!PageUptodate(page));
														
 
															+
														
 
															+		cur = min(len, PAGE_CACHE_SIZE - offset);
														
 
															+		kaddr = kmap_atomic(page, KM_USER1);
														
 
															+		memcpy(kaddr + offset, src, cur);
														
 
															+		kunmap_atomic(kaddr, KM_USER1);
														
 
															+
														
 
															+		src += cur;
														
 
															+		len -= cur;
														
 
															+		offset = 0;
														
 
															+		i++;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(write_extent_buffer);
														
 
															+
														
 
															+void memset_extent_buffer(struct extent_buffer *eb, char c,
														
 
															+			  unsigned long start, unsigned long len)
														
 
															+{
														
 
															+	size_t cur;
														
 
															+	size_t offset;
														
 
															+	struct page *page;
														
 
															+	char *kaddr;
														
 
															+	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	WARN_ON(start > eb->len);
														
 
															+	WARN_ON(start + len > eb->start + eb->len);
														
 
															+
														
 
															+	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+	while(len > 0) {
														
 
															+		page = extent_buffer_page(eb, i);
														
 
															+		WARN_ON(!PageUptodate(page));
														
 
															+
														
 
															+		cur = min(len, PAGE_CACHE_SIZE - offset);
														
 
															+		kaddr = kmap_atomic(page, KM_USER0);
														
 
															+		memset(kaddr + offset, c, cur);
														
 
															+		kunmap_atomic(kaddr, KM_USER0);
														
 
															+
														
 
															+		len -= cur;
														
 
															+		offset = 0;
														
 
															+		i++;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(memset_extent_buffer);
														
 
															+
														
 
															+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
														
 
															+			unsigned long dst_offset, unsigned long src_offset,
														
 
															+			unsigned long len)
														
 
															+{
														
 
															+	u64 dst_len = dst->len;
														
 
															+	size_t cur;
														
 
															+	size_t offset;
														
 
															+	struct page *page;
														
 
															+	char *kaddr;
														
 
															+	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	WARN_ON(src->len != dst_len);
														
 
															+
														
 
															+	offset = (start_offset + dst_offset) &
														
 
															+		((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+	while(len > 0) {
														
 
															+		page = extent_buffer_page(dst, i);
														
 
															+		WARN_ON(!PageUptodate(page));
														
 
															+
														
 
															+		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
														
 
															+
														
 
															+		kaddr = kmap_atomic(page, KM_USER0);
														
 
															+		read_extent_buffer(src, kaddr + offset, src_offset, cur);
														
 
															+		kunmap_atomic(kaddr, KM_USER0);
														
 
															+
														
 
															+		src_offset += cur;
														
 
															+		len -= cur;
														
 
															+		offset = 0;
														
 
															+		i++;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(copy_extent_buffer);
														
 
															+
														
 
															+static void move_pages(struct page *dst_page, struct page *src_page,
														
 
															+		       unsigned long dst_off, unsigned long src_off,
														
 
															+		       unsigned long len)
														
 
															+{
														
 
															+	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
														
 
															+	if (dst_page == src_page) {
														
 
															+		memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
														
 
															+	} else {
														
 
															+		char *src_kaddr = kmap_atomic(src_page, KM_USER1);
														
 
															+		char *p = dst_kaddr + dst_off + len;
														
 
															+		char *s = src_kaddr + src_off + len;
														
 
															+
														
 
															+		while (len--)
														
 
															+			*--p = *--s;
														
 
															+
														
 
															+		kunmap_atomic(src_kaddr, KM_USER1);
														
 
															+	}
														
 
															+	kunmap_atomic(dst_kaddr, KM_USER0);
														
 
															+}
														
 
															+
														
 
															+static void copy_pages(struct page *dst_page, struct page *src_page,
														
 
															+		       unsigned long dst_off, unsigned long src_off,
														
 
															+		       unsigned long len)
														
 
															+{
														
 
															+	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
														
 
															+	char *src_kaddr;
														
 
															+
														
 
															+	if (dst_page != src_page)
														
 
															+		src_kaddr = kmap_atomic(src_page, KM_USER1);
														
 
															+	else
														
 
															+		src_kaddr = dst_kaddr;
														
 
															+
														
 
															+	memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
														
 
															+	kunmap_atomic(dst_kaddr, KM_USER0);
														
 
															+	if (dst_page != src_page)
														
 
															+		kunmap_atomic(src_kaddr, KM_USER1);
														
 
															+}
														
 
															+
														
 
															+void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
														
 
															+			   unsigned long src_offset, unsigned long len)
														
 
															+{
														
 
															+	size_t cur;
														
 
															+	size_t dst_off_in_page;
														
 
															+	size_t src_off_in_page;
														
 
															+	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long dst_i;
														
 
															+	unsigned long src_i;
														
 
															+
														
 
															+	if (src_offset + len > dst->len) {
														
 
															+		printk("memmove bogus src_offset %lu move len %lu len %lu\n",
														
 
															+		       src_offset, len, dst->len);
														
 
															+		BUG_ON(1);
														
 
															+	}
														
 
															+	if (dst_offset + len > dst->len) {
														
 
															+		printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
														
 
															+		       dst_offset, len, dst->len);
														
 
															+		BUG_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	while(len > 0) {
														
 
															+		dst_off_in_page = (start_offset + dst_offset) &
														
 
															+			((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+		src_off_in_page = (start_offset + src_offset) &
														
 
															+			((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+		dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
														
 
															+		src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
														
 
															+					       src_off_in_page));
														
 
															+		cur = min_t(unsigned long, cur,
														
 
															+			(unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
														
 
															+
														
 
															+		copy_pages(extent_buffer_page(dst, dst_i),
														
 
															+			   extent_buffer_page(dst, src_i),
														
 
															+			   dst_off_in_page, src_off_in_page, cur);
														
 
															+
														
 
															+		src_offset += cur;
														
 
															+		dst_offset += cur;
														
 
															+		len -= cur;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(memcpy_extent_buffer);
														
 
															+
														
 
															+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
														
 
															+			   unsigned long src_offset, unsigned long len)
														
 
															+{
														
 
															+	size_t cur;
														
 
															+	size_t dst_off_in_page;
														
 
															+	size_t src_off_in_page;
														
 
															+	unsigned long dst_end = dst_offset + len - 1;
														
 
															+	unsigned long src_end = src_offset + len - 1;
														
 
															+	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
														
 
															+	unsigned long dst_i;
														
 
															+	unsigned long src_i;
														
 
															+
														
 
															+	if (src_offset + len > dst->len) {
														
 
															+		printk("memmove bogus src_offset %lu move len %lu len %lu\n",
														
 
															+		       src_offset, len, dst->len);
														
 
															+		BUG_ON(1);
														
 
															+	}
														
 
															+	if (dst_offset + len > dst->len) {
														
 
															+		printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
														
 
															+		       dst_offset, len, dst->len);
														
 
															+		BUG_ON(1);
														
 
															+	}
														
 
															+	if (dst_offset < src_offset) {
														
 
															+		memcpy_extent_buffer(dst, dst_offset, src_offset, len);
														
 
															+		return;
														
 
															+	}
														
 
															+	while(len > 0) {
														
 
															+		dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
														
 
															+		src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+		dst_off_in_page = (start_offset + dst_end) &
														
 
															+			((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+		src_off_in_page = (start_offset + src_end) &
														
 
															+			((unsigned long)PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+		cur = min_t(unsigned long, len, src_off_in_page + 1);
														
 
															+		cur = min(cur, dst_off_in_page + 1);
														
 
															+		move_pages(extent_buffer_page(dst, dst_i),
														
 
															+			   extent_buffer_page(dst, src_i),
														
 
															+			   dst_off_in_page - cur + 1,
														
 
															+			   src_off_in_page - cur + 1, cur);
														
 
															+
														
 
															+		dst_end -= cur;
														
 
															+		src_end -= cur;
														
 
															+		len -= cur;
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(memmove_extent_buffer);
														
 
															+
														
 
															+int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
														
 
															+{
														
 
															+	u64 start = page_offset(page);
														
 
															+	struct extent_buffer *eb;
														
 
															+	int ret = 1;
														
 
															+	unsigned long i;
														
 
															+	unsigned long num_pages;
														
 
															+
														
 
															+	spin_lock(&tree->buffer_lock);
														
 
															+	eb = buffer_search(tree, start);
														
 
															+	if (!eb)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (atomic_read(&eb->refs) > 1) {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	/* at this point we can safely release the extent buffer */
														
 
															+	num_pages = num_extent_pages(eb->start, eb->len);
														
 
															+	for (i = 0; i < num_pages; i++)
														
 
															+		page_cache_release(extent_buffer_page(eb, i));
														
 
															+	rb_erase(&eb->rb_node, &tree->buffer);
														
 
															+	__free_extent_buffer(eb);
														
 
															+out:
														
 
															+	spin_unlock(&tree->buffer_lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(try_release_extent_buffer);
														
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -0,0 +1,248 @@
 
															+#ifndef __EXTENTIO__
														
 
															+#define __EXTENTIO__
														
 
															+
														
 
															+#include <linux/rbtree.h>
														
 
															+
														
 
															+/* bits for the extent state */
														
 
															+#define EXTENT_DIRTY 1
														
 
															+#define EXTENT_WRITEBACK (1 << 1)
														
 
															+#define EXTENT_UPTODATE (1 << 2)
														
 
															+#define EXTENT_LOCKED (1 << 3)
														
 
															+#define EXTENT_NEW (1 << 4)
														
 
															+#define EXTENT_DELALLOC (1 << 5)
														
 
															+#define EXTENT_DEFRAG (1 << 6)
														
 
															+#define EXTENT_DEFRAG_DONE (1 << 7)
														
 
															+#define EXTENT_BUFFER_FILLED (1 << 8)
														
 
															+#define EXTENT_ORDERED (1 << 9)
														
 
															+#define EXTENT_ORDERED_METADATA (1 << 10)
														
 
															+#define EXTENT_BOUNDARY (1 << 11)
														
 
															+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
														
 
															+
														
 
															+/*
														
 
															+ * page->private values.  Every page that is controlled by the extent
														
 
															+ * map has page->private set to one.
														
 
															+ */
														
 
															+#define EXTENT_PAGE_PRIVATE 1
														
 
															+#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
														
 
															+
														
 
															+struct extent_state;
														
 
															+
														
 
															+typedef	int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
														
 
															+				       struct bio *bio, int mirror_num);
														
 
															+struct extent_io_ops {
														
 
															+	int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
														
 
															+	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
														
 
															+	int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
														
 
															+	extent_submit_bio_hook_t *submit_bio_hook;
														
 
															+	int (*merge_bio_hook)(struct page *page, unsigned long offset,
														
 
															+			      size_t size, struct bio *bio);
														
 
															+	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
														
 
															+	int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
														
 
															+				       u64 start, u64 end,
														
 
															+				       struct extent_state *state);
														
 
															+	int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
														
 
															+					u64 start, u64 end,
														
 
															+				       struct extent_state *state);
														
 
															+	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
														
 
															+				    struct extent_state *state);
														
 
															+	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
														
 
															+				      struct extent_state *state, int uptodate);
														
 
															+	int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
														
 
															+			    unsigned long old, unsigned long bits);
														
 
															+	int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end,
														
 
															+			    unsigned long old, unsigned long bits);
														
 
															+	int (*write_cache_pages_lock_hook)(struct page *page);
														
 
															+};
														
 
															+
														
 
															+struct extent_io_tree {
														
 
															+	struct rb_root state;
														
 
															+	struct rb_root buffer;
														
 
															+	struct address_space *mapping;
														
 
															+	u64 dirty_bytes;
														
 
															+	spinlock_t lock;
														
 
															+	spinlock_t buffer_lock;
														
 
															+	struct extent_io_ops *ops;
														
 
															+};
														
 
															+
														
 
															+struct extent_state {
														
 
															+	u64 start;
														
 
															+	u64 end; /* inclusive */
														
 
															+	struct rb_node rb_node;
														
 
															+	struct extent_io_tree *tree;
														
 
															+	wait_queue_head_t wq;
														
 
															+	atomic_t refs;
														
 
															+	unsigned long state;
														
 
															+
														
 
															+	/* for use by the FS */
														
 
															+	u64 private;
														
 
															+
														
 
															+	struct list_head leak_list;
														
 
															+};
														
 
															+
														
 
															+struct extent_buffer {
														
 
															+	u64 start;
														
 
															+	unsigned long len;
														
 
															+	char *map_token;
														
 
															+	char *kaddr;
														
 
															+	unsigned long map_start;
														
 
															+	unsigned long map_len;
														
 
															+	struct page *first_page;
														
 
															+	atomic_t refs;
														
 
															+	int flags;
														
 
															+	struct list_head leak_list;
														
 
															+	struct rb_node rb_node;
														
 
															+	struct mutex mutex;
														
 
															+};
														
 
															+
														
 
															+struct extent_map_tree;
														
 
															+
														
 
															+static inline struct extent_state *extent_state_next(struct extent_state *state)
														
 
															+{
														
 
															+	struct rb_node *node;
														
 
															+	node = rb_next(&state->rb_node);
														
 
															+	if (!node)
														
 
															+		return NULL;
														
 
															+	return rb_entry(node, struct extent_state, rb_node);
														
 
															+}
														
 
															+
														
 
															+typedef struct extent_map *(get_extent_t)(struct inode *inode,
														
 
															+					  struct page *page,
														
 
															+					  size_t page_offset,
														
 
															+					  u64 start, u64 len,
														
 
															+					  int create);
														
 
															+
														
 
															+void extent_io_tree_init(struct extent_io_tree *tree,
														
 
															+			  struct address_space *mapping, gfp_t mask);
														
 
															+int try_release_extent_mapping(struct extent_map_tree *map,
														
 
															+			       struct extent_io_tree *tree, struct page *page,
														
 
															+			       gfp_t mask);
														
 
															+int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page);
														
 
															+int try_release_extent_state(struct extent_map_tree *map,
														
 
															+			     struct extent_io_tree *tree, struct page *page,
														
 
															+			     gfp_t mask);
														
 
															+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
														
 
															+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
														
 
															+int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
														
 
															+			  get_extent_t *get_extent);
														
 
															+int __init extent_io_init(void);
														
 
															+void extent_io_exit(void);
														
 
															+
														
 
															+u64 count_range_bits(struct extent_io_tree *tree,
														
 
															+		     u64 *start, u64 search_end,
														
 
															+		     u64 max_bytes, unsigned long bits);
														
 
															+
														
 
															+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		   int bits, int filled);
														
 
															+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		      int bits, gfp_t mask);
														
 
															+int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     int bits, int wake, int delete, gfp_t mask);
														
 
															+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		    int bits, gfp_t mask);
														
 
															+int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+			gfp_t mask);
														
 
															+int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		   gfp_t mask);
														
 
															+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     gfp_t mask);
														
 
															+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		       gfp_t mask);
														
 
															+int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		       gfp_t mask);
														
 
															+int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
														
 
															+				  u64 end, gfp_t mask);
														
 
															+int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     gfp_t mask);
														
 
															+int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
														
 
															+		     gfp_t mask);
														
 
															+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
														
 
															+			  u64 *start_ret, u64 *end_ret, int bits);
														
 
															+struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
														
 
															+						 u64 start, int bits);
														
 
															+int extent_invalidatepage(struct extent_io_tree *tree,
														
 
															+			  struct page *page, unsigned long offset);
														
 
															+int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
														
 
															+			  get_extent_t *get_extent,
														
 
															+			  struct writeback_control *wbc);
														
 
															+int extent_writepages(struct extent_io_tree *tree,
														
 
															+		      struct address_space *mapping,
														
 
															+		      get_extent_t *get_extent,
														
 
															+		      struct writeback_control *wbc);
														
 
															+int extent_readpages(struct extent_io_tree *tree,
														
 
															+		     struct address_space *mapping,
														
 
															+		     struct list_head *pages, unsigned nr_pages,
														
 
															+		     get_extent_t get_extent);
														
 
															+int extent_prepare_write(struct extent_io_tree *tree,
														
 
															+			 struct inode *inode, struct page *page,
														
 
															+			 unsigned from, unsigned to, get_extent_t *get_extent);
														
 
															+int extent_commit_write(struct extent_io_tree *tree,
														
 
															+			struct inode *inode, struct page *page,
														
 
															+			unsigned from, unsigned to);
														
 
															+sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
														
 
															+		get_extent_t *get_extent);
														
 
															+int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
														
 
															+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
														
 
															+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
														
 
															+void set_page_extent_mapped(struct page *page);
														
 
															+
														
 
															+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
														
 
															+					  u64 start, unsigned long len,
														
 
															+					  struct page *page0,
														
 
															+					  gfp_t mask);
														
 
															+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
														
 
															+					 u64 start, unsigned long len,
														
 
															+					  gfp_t mask);
														
 
															+void free_extent_buffer(struct extent_buffer *eb);
														
 
															+int read_extent_buffer_pages(struct extent_io_tree *tree,
														
 
															+			     struct extent_buffer *eb, u64 start, int wait,
														
 
															+			     get_extent_t *get_extent, int mirror_num);
														
 
															+
														
 
															+static inline void extent_buffer_get(struct extent_buffer *eb)
														
 
															+{
														
 
															+	atomic_inc(&eb->refs);
														
 
															+}
														
 
															+
														
 
															+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
														
 
															+			  unsigned long start,
														
 
															+			  unsigned long len);
														
 
															+void read_extent_buffer(struct extent_buffer *eb, void *dst,
														
 
															+			unsigned long start,
														
 
															+			unsigned long len);
														
 
															+void write_extent_buffer(struct extent_buffer *eb, const void *src,
														
 
															+			 unsigned long start, unsigned long len);
														
 
															+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
														
 
															+			unsigned long dst_offset, unsigned long src_offset,
														
 
															+			unsigned long len);
														
 
															+void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
														
 
															+			   unsigned long src_offset, unsigned long len);
														
 
															+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
														
 
															+			   unsigned long src_offset, unsigned long len);
														
 
															+void memset_extent_buffer(struct extent_buffer *eb, char c,
														
 
															+			  unsigned long start, unsigned long len);
														
 
															+int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
														
 
															+				    struct extent_buffer *eb);
														
 
															+int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
														
 
															+int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
														
 
															+int clear_extent_buffer_dirty(struct extent_io_tree *tree,
														
 
															+			      struct extent_buffer *eb);
														
 
															+int set_extent_buffer_dirty(struct extent_io_tree *tree,
														
 
															+			     struct extent_buffer *eb);
														
 
															+int set_extent_buffer_uptodate(struct extent_io_tree *tree,
														
 
															+			       struct extent_buffer *eb);
														
 
															+int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
														
 
															+				struct extent_buffer *eb);
														
 
															+int extent_buffer_uptodate(struct extent_io_tree *tree,
														
 
															+			   struct extent_buffer *eb);
														
 
															+int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
														
 
															+		      unsigned long min_len, char **token, char **map,
														
 
															+		      unsigned long *map_start,
														
 
															+		      unsigned long *map_len, int km);
														
 
															+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
														
 
															+		      unsigned long min_len, char **token, char **map,
														
 
															+		      unsigned long *map_start,
														
 
															+		      unsigned long *map_len, int km);
														
 
															+void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
														
 
															+int release_extent_buffer_tail_pages(struct extent_buffer *eb);
														
 
															+int extent_range_uptodate(struct extent_io_tree *tree,
														
 
															+			  u64 start, u64 end);
														
 
															+#endif
														
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -0,0 +1,342 @@
 
															+#include <linux/err.h>
														
 
															+#include <linux/gfp.h>
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/version.h>
														
 
															+#include <linux/hardirq.h>
														
 
															+#include "extent_map.h"
														
 
															+
														
 
															+/* temporary define until extent_map moves out of btrfs */
														
 
															+struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
														
 
															+				       unsigned long extra_flags,
														
 
															+				       void (*ctor)(void *, struct kmem_cache *,
														
 
															+						    unsigned long));
														
 
															+
														
 
															+static struct kmem_cache *extent_map_cache;
														
 
															+
														
 
															+int __init extent_map_init(void)
														
 
															+{
														
 
															+	extent_map_cache = btrfs_cache_create("extent_map",
														
 
															+					    sizeof(struct extent_map), 0,
														
 
															+					    NULL);
														
 
															+	if (!extent_map_cache)
														
 
															+		return -ENOMEM;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void extent_map_exit(void)
														
 
															+{
														
 
															+	if (extent_map_cache)
														
 
															+		kmem_cache_destroy(extent_map_cache);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * extent_map_tree_init - initialize extent map tree
														
 
															+ * @tree:		tree to initialize
														
 
															+ * @mask:		flags for memory allocations during tree operations
														
 
															+ *
														
 
															+ * Initialize the extent tree @tree.  Should be called for each new inode
														
 
															+ * or other user of the extent_map interface.
														
 
															+ */
														
 
															+void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
														
 
															+{
														
 
															+	tree->map.rb_node = NULL;
														
 
															+	spin_lock_init(&tree->lock);
														
 
															+}
														
 
															+EXPORT_SYMBOL(extent_map_tree_init);
														
 
															+
														
 
															+/**
														
 
															+ * alloc_extent_map - allocate new extent map structure
														
 
															+ * @mask:	memory allocation flags
														
 
															+ *
														
 
															+ * Allocate a new extent_map structure.  The new structure is
														
 
															+ * returned with a reference count of one and needs to be
														
 
															+ * freed using free_extent_map()
														
 
															+ */
														
 
															+struct extent_map *alloc_extent_map(gfp_t mask)
														
 
															+{
														
 
															+	struct extent_map *em;
														
 
															+	em = kmem_cache_alloc(extent_map_cache, mask);
														
 
															+	if (!em || IS_ERR(em))
														
 
															+		return em;
														
 
															+	em->in_tree = 0;
														
 
															+	em->flags = 0;
														
 
															+	atomic_set(&em->refs, 1);
														
 
															+	return em;
														
 
															+}
														
 
															+EXPORT_SYMBOL(alloc_extent_map);
														
 
															+
														
 
															+/**
														
 
															+ * free_extent_map - drop reference count of an extent_map
														
 
															+ * @em:		extent map beeing releasead
														
 
															+ *
														
 
															+ * Drops the reference out on @em by one and free the structure
														
 
															+ * if the reference count hits zero.
														
 
															+ */
														
 
															+void free_extent_map(struct extent_map *em)
														
 
															+{
														
 
															+	if (!em)
														
 
															+		return;
														
 
															+	WARN_ON(atomic_read(&em->refs) == 0);
														
 
															+	if (atomic_dec_and_test(&em->refs)) {
														
 
															+		WARN_ON(em->in_tree);
														
 
															+		kmem_cache_free(extent_map_cache, em);
														
 
															+	}
														
 
															+}
														
 
															+EXPORT_SYMBOL(free_extent_map);
														
 
															+
														
 
															+static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
														
 
															+				   struct rb_node *node)
														
 
															+{
														
 
															+	struct rb_node ** p = &root->rb_node;
														
 
															+	struct rb_node * parent = NULL;
														
 
															+	struct extent_map *entry;
														
 
															+
														
 
															+	while(*p) {
														
 
															+		parent = *p;
														
 
															+		entry = rb_entry(parent, struct extent_map, rb_node);
														
 
															+
														
 
															+		WARN_ON(!entry->in_tree);
														
 
															+
														
 
															+		if (offset < entry->start)
														
 
															+			p = &(*p)->rb_left;
														
 
															+		else if (offset >= extent_map_end(entry))
														
 
															+			p = &(*p)->rb_right;
														
 
															+		else
														
 
															+			return parent;
														
 
															+	}
														
 
															+
														
 
															+	entry = rb_entry(node, struct extent_map, rb_node);
														
 
															+	entry->in_tree = 1;
														
 
															+	rb_link_node(node, parent, p);
														
 
															+	rb_insert_color(node, root);
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * search through the tree for an extent_map with a given offset.  If
														
 
															+ * it can't be found, try to find some neighboring extents
														
 
															+ */
														
 
															+static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
														
 
															+				     struct rb_node **prev_ret,
														
 
															+				     struct rb_node **next_ret)
														
 
															+{
														
 
															+	struct rb_node * n = root->rb_node;
														
 
															+	struct rb_node *prev = NULL;
														
 
															+	struct rb_node *orig_prev = NULL;
														
 
															+	struct extent_map *entry;
														
 
															+	struct extent_map *prev_entry = NULL;
														
 
															+
														
 
															+	while(n) {
														
 
															+		entry = rb_entry(n, struct extent_map, rb_node);
														
 
															+		prev = n;
														
 
															+		prev_entry = entry;
														
 
															+
														
 
															+		WARN_ON(!entry->in_tree);
														
 
															+
														
 
															+		if (offset < entry->start)
														
 
															+			n = n->rb_left;
														
 
															+		else if (offset >= extent_map_end(entry))
														
 
															+			n = n->rb_right;
														
 
															+		else
														
 
															+			return n;
														
 
															+	}
														
 
															+
														
 
															+	if (prev_ret) {
														
 
															+		orig_prev = prev;
														
 
															+		while(prev && offset >= extent_map_end(prev_entry)) {
														
 
															+			prev = rb_next(prev);
														
 
															+			prev_entry = rb_entry(prev, struct extent_map, rb_node);
														
 
															+		}
														
 
															+		*prev_ret = prev;
														
 
															+		prev = orig_prev;
														
 
															+	}
														
 
															+
														
 
															+	if (next_ret) {
														
 
															+		prev_entry = rb_entry(prev, struct extent_map, rb_node);
														
 
															+		while(prev && offset < prev_entry->start) {
														
 
															+			prev = rb_prev(prev);
														
 
															+			prev_entry = rb_entry(prev, struct extent_map, rb_node);
														
 
															+		}
														
 
															+		*next_ret = prev;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * look for an offset in the tree, and if it can't be found, return
														
 
															+ * the first offset we can find smaller than 'offset'.
														
 
															+ */
														
 
															+static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
														
 
															+{
														
 
															+	struct rb_node *prev;
														
 
															+	struct rb_node *ret;
														
 
															+	ret = __tree_search(root, offset, &prev, NULL);
														
 
															+	if (!ret)
														
 
															+		return prev;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* check to see if two extent_map structs are adjacent and safe to merge */
														
 
															+static int mergable_maps(struct extent_map *prev, struct extent_map *next)
														
 
															+{
														
 
															+	if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
														
 
															+		return 0;
														
 
															+
														
 
															+	if (extent_map_end(prev) == next->start &&
														
 
															+	    prev->flags == next->flags &&
														
 
															+	    prev->bdev == next->bdev &&
														
 
															+	    ((next->block_start == EXTENT_MAP_HOLE &&
														
 
															+	      prev->block_start == EXTENT_MAP_HOLE) ||
														
 
															+	     (next->block_start == EXTENT_MAP_INLINE &&
														
 
															+	      prev->block_start == EXTENT_MAP_INLINE) ||
														
 
															+	     (next->block_start == EXTENT_MAP_DELALLOC &&
														
 
															+	      prev->block_start == EXTENT_MAP_DELALLOC) ||
														
 
															+	     (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
														
 
															+	      next->block_start == extent_map_block_end(prev)))) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * add_extent_mapping - add new extent map to the extent tree
														
 
															+ * @tree:	tree to insert new map in
														
 
															+ * @em:		map to insert
														
 
															+ *
														
 
															+ * Insert @em into @tree or perform a simple forward/backward merge with
														
 
															+ * existing mappings.  The extent_map struct passed in will be inserted
														
 
															+ * into the tree directly, with an additional reference taken, or a
														
 
															+ * reference dropped if the merge attempt was sucessfull.
														
 
															+ */
														
 
															+int add_extent_mapping(struct extent_map_tree *tree,
														
 
															+		       struct extent_map *em)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct extent_map *merge = NULL;
														
 
															+	struct rb_node *rb;
														
 
															+	struct extent_map *exist;
														
 
															+
														
 
															+	exist = lookup_extent_mapping(tree, em->start, em->len);
														
 
															+	if (exist) {
														
 
															+		free_extent_map(exist);
														
 
															+		ret = -EEXIST;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	assert_spin_locked(&tree->lock);
														
 
															+	rb = tree_insert(&tree->map, em->start, &em->rb_node);
														
 
															+	if (rb) {
														
 
															+		ret = -EEXIST;
														
 
															+		free_extent_map(merge);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	atomic_inc(&em->refs);
														
 
															+	if (em->start != 0) {
														
 
															+		rb = rb_prev(&em->rb_node);
														
 
															+		if (rb)
														
 
															+			merge = rb_entry(rb, struct extent_map, rb_node);
														
 
															+		if (rb && mergable_maps(merge, em)) {
														
 
															+			em->start = merge->start;
														
 
															+			em->len += merge->len;
														
 
															+			em->block_start = merge->block_start;
														
 
															+			merge->in_tree = 0;
														
 
															+			rb_erase(&merge->rb_node, &tree->map);
														
 
															+			free_extent_map(merge);
														
 
															+		}
														
 
															+	 }
														
 
															+	rb = rb_next(&em->rb_node);
														
 
															+	if (rb)
														
 
															+		merge = rb_entry(rb, struct extent_map, rb_node);
														
 
															+	if (rb && mergable_maps(em, merge)) {
														
 
															+		em->len += merge->len;
														
 
															+		rb_erase(&merge->rb_node, &tree->map);
														
 
															+		merge->in_tree = 0;
														
 
															+		free_extent_map(merge);
														
 
															+	}
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(add_extent_mapping);
														
 
															+
														
 
															+/* simple helper to do math around the end of an extent, handling wrap */
														
 
															+static u64 range_end(u64 start, u64 len)
														
 
															+{
														
 
															+	if (start + len < start)
														
 
															+		return (u64)-1;
														
 
															+	return start + len;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * lookup_extent_mapping - lookup extent_map
														
 
															+ * @tree:	tree to lookup in
														
 
															+ * @start:	byte offset to start the search
														
 
															+ * @len:	length of the lookup range
														
 
															+ *
														
 
															+ * Find and return the first extent_map struct in @tree that intersects the
														
 
															+ * [start, len] range.  There may be additional objects in the tree that
														
 
															+ * intersect, so check the object returned carefully to make sure that no
														
 
															+ * additional lookups are needed.
														
 
															+ */
														
 
															+struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
														
 
															+					 u64 start, u64 len)
														
 
															+{
														
 
															+	struct extent_map *em;
														
 
															+	struct rb_node *rb_node;
														
 
															+	struct rb_node *prev = NULL;
														
 
															+	struct rb_node *next = NULL;
														
 
															+	u64 end = range_end(start, len);
														
 
															+
														
 
															+	assert_spin_locked(&tree->lock);
														
 
															+	rb_node = __tree_search(&tree->map, start, &prev, &next);
														
 
															+	if (!rb_node && prev) {
														
 
															+		em = rb_entry(prev, struct extent_map, rb_node);
														
 
															+		if (end > em->start && start < extent_map_end(em))
														
 
															+			goto found;
														
 
															+	}
														
 
															+	if (!rb_node && next) {
														
 
															+		em = rb_entry(next, struct extent_map, rb_node);
														
 
															+		if (end > em->start && start < extent_map_end(em))
														
 
															+			goto found;
														
 
															+	}
														
 
															+	if (!rb_node) {
														
 
															+		em = NULL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (IS_ERR(rb_node)) {
														
 
															+		em = ERR_PTR(PTR_ERR(rb_node));
														
 
															+		goto out;
														
 
															+	}
														
 
															+	em = rb_entry(rb_node, struct extent_map, rb_node);
														
 
															+	if (end > em->start && start < extent_map_end(em))
														
 
															+		goto found;
														
 
															+
														
 
															+	em = NULL;
														
 
															+	goto out;
														
 
															+
														
 
															+found:
														
 
															+	atomic_inc(&em->refs);
														
 
															+out:
														
 
															+	return em;
														
 
															+}
														
 
															+EXPORT_SYMBOL(lookup_extent_mapping);
														
 
															+
														
 
															+/**
														
 
															+ * remove_extent_mapping - removes an extent_map from the extent tree
														
 
															+ * @tree:	extent tree to remove from
														
 
															+ * @em:		extent map beeing removed
														
 
															+ *
														
 
															+ * Removes @em from @tree.  No reference counts are dropped, and no checks
														
 
															+ * are done to see if the range is in use
														
 
															+ */
														
 
															+int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
														
 
															+	assert_spin_locked(&tree->lock);
														
 
															+	rb_erase(&em->rb_node, &tree->map);
														
 
															+	em->in_tree = 0;
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL(remove_extent_mapping);
														
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -0,0 +1,57 @@
 
															+#ifndef __EXTENTMAP__
														
 
															+#define __EXTENTMAP__
														
 
															+
														
 
															+#include <linux/rbtree.h>
														
 
															+
														
 
															+#define EXTENT_MAP_LAST_BYTE (u64)-4
														
 
															+#define EXTENT_MAP_HOLE (u64)-3
														
 
															+#define EXTENT_MAP_INLINE (u64)-2
														
 
															+#define EXTENT_MAP_DELALLOC (u64)-1
														
 
															+
														
 
															+/* bits for the flags field */
														
 
															+#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
														
 
															+
														
 
															+struct extent_map {
														
 
															+	struct rb_node rb_node;
														
 
															+
														
 
															+	/* all of these are in bytes */
														
 
															+	u64 start;
														
 
															+	u64 len;
														
 
															+	u64 block_start;
														
 
															+	unsigned long flags;
														
 
															+	struct block_device *bdev;
														
 
															+	atomic_t refs;
														
 
															+	int in_tree;
														
 
															+};
														
 
															+
														
 
															+struct extent_map_tree {
														
 
															+	struct rb_root map;
														
 
															+	spinlock_t lock;
														
 
															+};
														
 
															+
														
 
															+static inline u64 extent_map_end(struct extent_map *em)
														
 
															+{
														
 
															+	if (em->start + em->len < em->start)
														
 
															+		return (u64)-1;
														
 
															+	return em->start + em->len;
														
 
															+}
														
 
															+
														
 
															+static inline u64 extent_map_block_end(struct extent_map *em)
														
 
															+{
														
 
															+	if (em->block_start + em->len < em->block_start)
														
 
															+		return (u64)-1;
														
 
															+	return em->block_start + em->len;
														
 
															+}
														
 
															+
														
 
															+void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask);
														
 
															+struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
														
 
															+					 u64 start, u64 len);
														
 
															+int add_extent_mapping(struct extent_map_tree *tree,
														
 
															+		       struct extent_map *em);
														
 
															+int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
														
 
															+
														
 
															+struct extent_map *alloc_extent_map(gfp_t mask);
														
 
															+void free_extent_map(struct extent_map *em);
														
 
															+int __init extent_map_init(void);
														
 
															+void extent_map_exit(void);
														
 
															+#endif
														
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -0,0 +1,512 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/bio.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/highmem.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "print-tree.h"
														
 
															+
														
 
															+#define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \
														
 
															+			       sizeof(struct btrfs_item) * 2) / \
														
 
															+			       BTRFS_CRC32_SIZE) - 1))
														
 
															+int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     u64 objectid, u64 pos,
														
 
															+			     u64 disk_offset, u64 disk_num_bytes,
														
 
															+			     u64 num_bytes, u64 offset)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_file_extent_item *item;
														
 
															+	struct btrfs_key file_key;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	file_key.objectid = objectid;
														
 
															+	file_key.offset = pos;
														
 
															+	btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
														
 
															+				      sizeof(*item));
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	BUG_ON(ret);
														
 
															+	leaf = path->nodes[0];
														
 
															+	item = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+			      struct btrfs_file_extent_item);
														
 
															+	btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
														
 
															+	btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
														
 
															+	btrfs_set_file_extent_offset(leaf, item, offset);
														
 
															+	btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
														
 
															+	btrfs_set_file_extent_generation(leaf, item, trans->transid);
														
 
															+	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
														
 
															+					  struct btrfs_root *root,
														
 
															+					  struct btrfs_path *path,
														
 
															+					  u64 objectid, u64 offset,
														
 
															+					  int cow)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key file_key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_csum_item *item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	u64 csum_offset = 0;
														
 
															+	int csums_in_item;
														
 
															+
														
 
															+	file_key.objectid = objectid;
														
 
															+	file_key.offset = offset;
														
 
															+	btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
														
 
															+	ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
														
 
															+	if (ret < 0)
														
 
															+		goto fail;
														
 
															+	leaf = path->nodes[0];
														
 
															+	if (ret > 0) {
														
 
															+		ret = 1;
														
 
															+		if (path->slots[0] == 0)
														
 
															+			goto fail;
														
 
															+		path->slots[0]--;
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
														
 
															+		    found_key.objectid != objectid) {
														
 
															+			goto fail;
														
 
															+		}
														
 
															+		csum_offset = (offset - found_key.offset) >>
														
 
															+				root->fs_info->sb->s_blocksize_bits;
														
 
															+		csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+		csums_in_item /= BTRFS_CRC32_SIZE;
														
 
															+
														
 
															+		if (csum_offset >= csums_in_item) {
														
 
															+			ret = -EFBIG;
														
 
															+			goto fail;
														
 
															+		}
														
 
															+	}
														
 
															+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
														
 
															+	item = (struct btrfs_csum_item *)((unsigned char *)item +
														
 
															+					  csum_offset * BTRFS_CRC32_SIZE);
														
 
															+	return item;
														
 
															+fail:
														
 
															+	if (ret > 0)
														
 
															+		ret = -ENOENT;
														
 
															+	return ERR_PTR(ret);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     struct btrfs_path *path, u64 objectid,
														
 
															+			     u64 offset, int mod)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key file_key;
														
 
															+	int ins_len = mod < 0 ? -1 : 0;
														
 
															+	int cow = mod != 0;
														
 
															+
														
 
															+	file_key.objectid = objectid;
														
 
															+	file_key.offset = offset;
														
 
															+	btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
														
 
															+	ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
														
 
															+			  struct bio *bio)
														
 
															+{
														
 
															+	u32 sum;
														
 
															+	struct bio_vec *bvec = bio->bi_io_vec;
														
 
															+	int bio_index = 0;
														
 
															+	u64 offset;
														
 
															+	u64 item_start_offset = 0;
														
 
															+	u64 item_last_offset = 0;
														
 
															+	u32 diff;
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_csum_item *item = NULL;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (bio->bi_size > PAGE_CACHE_SIZE * 8)
														
 
															+		path->reada = 2;
														
 
															+
														
 
															+	WARN_ON(bio->bi_vcnt <= 0);
														
 
															+
														
 
															+	while(bio_index < bio->bi_vcnt) {
														
 
															+		offset = page_offset(bvec->bv_page) + bvec->bv_offset;
														
 
															+		ret = btrfs_find_ordered_sum(inode, offset, &sum);
														
 
															+		if (ret == 0)
														
 
															+			goto found;
														
 
															+
														
 
															+		if (!item || offset < item_start_offset ||
														
 
															+		    offset >= item_last_offset) {
														
 
															+			struct btrfs_key found_key;
														
 
															+			u32 item_size;
														
 
															+
														
 
															+			if (item)
														
 
															+				btrfs_release_path(root, path);
														
 
															+			item = btrfs_lookup_csum(NULL, root, path,
														
 
															+						 inode->i_ino, offset, 0);
														
 
															+			if (IS_ERR(item)) {
														
 
															+				ret = PTR_ERR(item);
														
 
															+				if (ret == -ENOENT || ret == -EFBIG)
														
 
															+					ret = 0;
														
 
															+				sum = 0;
														
 
															+				printk("no csum found for inode %lu start "
														
 
															+				       "%llu\n", inode->i_ino,
														
 
															+				       (unsigned long long)offset);
														
 
															+				item = NULL;
														
 
															+				goto found;
														
 
															+			}
														
 
															+			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+					      path->slots[0]);
														
 
															+
														
 
															+			item_start_offset = found_key.offset;
														
 
															+			item_size = btrfs_item_size_nr(path->nodes[0],
														
 
															+						       path->slots[0]);
														
 
															+			item_last_offset = item_start_offset +
														
 
															+				(item_size / BTRFS_CRC32_SIZE) *
														
 
															+				root->sectorsize;
														
 
															+			item = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+					      struct btrfs_csum_item);
														
 
															+		}
														
 
															+		/*
														
 
															+		 * this byte range must be able to fit inside
														
 
															+		 * a single leaf so it will also fit inside a u32
														
 
															+		 */
														
 
															+		diff = offset - item_start_offset;
														
 
															+		diff = diff / root->sectorsize;
														
 
															+		diff = diff * BTRFS_CRC32_SIZE;
														
 
															+
														
 
															+		read_extent_buffer(path->nodes[0], &sum,
														
 
															+				   ((unsigned long)item) + diff,
														
 
															+				   BTRFS_CRC32_SIZE);
														
 
															+found:
														
 
															+		set_state_private(io_tree, offset, sum);
														
 
															+		bio_index++;
														
 
															+		bvec++;
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
														
 
															+		       struct bio *bio)
														
 
															+{
														
 
															+	struct btrfs_ordered_sum *sums;
														
 
															+	struct btrfs_sector_sum *sector_sum;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	char *data;
														
 
															+	struct bio_vec *bvec = bio->bi_io_vec;
														
 
															+	int bio_index = 0;
														
 
															+	unsigned long total_bytes = 0;
														
 
															+	unsigned long this_sum_bytes = 0;
														
 
															+	u64 offset;
														
 
															+
														
 
															+	WARN_ON(bio->bi_vcnt <= 0);
														
 
															+	sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
														
 
															+	if (!sums)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	sector_sum = sums->sums;
														
 
															+	sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset;
														
 
															+	sums->len = bio->bi_size;
														
 
															+	INIT_LIST_HEAD(&sums->list);
														
 
															+	ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
														
 
															+	BUG_ON(!ordered);
														
 
															+
														
 
															+	while(bio_index < bio->bi_vcnt) {
														
 
															+		offset = page_offset(bvec->bv_page) + bvec->bv_offset;
														
 
															+		if (offset >= ordered->file_offset + ordered->len ||
														
 
															+		    offset < ordered->file_offset) {
														
 
															+			unsigned long bytes_left;
														
 
															+			sums->len = this_sum_bytes;
														
 
															+			this_sum_bytes = 0;
														
 
															+			btrfs_add_ordered_sum(inode, ordered, sums);
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+
														
 
															+			bytes_left = bio->bi_size - total_bytes;
														
 
															+
														
 
															+			sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
														
 
															+				       GFP_NOFS);
														
 
															+			BUG_ON(!sums);
														
 
															+			sector_sum = sums->sums;
														
 
															+			sums->len = bytes_left;
														
 
															+			sums->file_offset = offset;
														
 
															+			ordered = btrfs_lookup_ordered_extent(inode,
														
 
															+						      sums->file_offset);
														
 
															+			BUG_ON(!ordered);
														
 
															+		}
														
 
															+
														
 
															+		data = kmap_atomic(bvec->bv_page, KM_USER0);
														
 
															+		sector_sum->sum = ~(u32)0;
														
 
															+		sector_sum->sum = btrfs_csum_data(root,
														
 
															+						  data + bvec->bv_offset,
														
 
															+						  sector_sum->sum,
														
 
															+						  bvec->bv_len);
														
 
															+		kunmap_atomic(data, KM_USER0);
														
 
															+		btrfs_csum_final(sector_sum->sum,
														
 
															+				 (char *)&sector_sum->sum);
														
 
															+		sector_sum->offset = page_offset(bvec->bv_page) +
														
 
															+			bvec->bv_offset;
														
 
															+
														
 
															+		sector_sum++;
														
 
															+		bio_index++;
														
 
															+		total_bytes += bvec->bv_len;
														
 
															+		this_sum_bytes += bvec->bv_len;
														
 
															+		bvec++;
														
 
															+	}
														
 
															+	this_sum_bytes = 0;
														
 
															+	btrfs_add_ordered_sum(inode, ordered, sums);
														
 
															+	btrfs_put_ordered_extent(ordered);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root, struct inode *inode,
														
 
															+			   struct btrfs_ordered_sum *sums)
														
 
															+{
														
 
															+	u64 objectid = inode->i_ino;
														
 
															+	u64 offset;
														
 
															+	int ret;
														
 
															+	struct btrfs_key file_key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	u64 next_offset;
														
 
															+	u64 total_bytes = 0;
														
 
															+	int found_next;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_csum_item *item;
														
 
															+	struct btrfs_csum_item *item_end;
														
 
															+	struct extent_buffer *leaf = NULL;
														
 
															+	u64 csum_offset;
														
 
															+	struct btrfs_sector_sum *sector_sum;
														
 
															+	u32 nritems;
														
 
															+	u32 ins_size;
														
 
															+	char *eb_map;
														
 
															+	char *eb_token;
														
 
															+	unsigned long map_len;
														
 
															+	unsigned long map_start;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	sector_sum = sums->sums;
														
 
															+again:
														
 
															+	next_offset = (u64)-1;
														
 
															+	found_next = 0;
														
 
															+	offset = sector_sum->offset;
														
 
															+	file_key.objectid = objectid;
														
 
															+	file_key.offset = offset;
														
 
															+	btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
														
 
															+
														
 
															+	mutex_lock(&BTRFS_I(inode)->csum_mutex);
														
 
															+	item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1);
														
 
															+	if (!IS_ERR(item)) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		ret = 0;
														
 
															+		goto found;
														
 
															+	}
														
 
															+	ret = PTR_ERR(item);
														
 
															+	if (ret == -EFBIG) {
														
 
															+		u32 item_size;
														
 
															+		/* we found one, but it isn't big enough yet */
														
 
															+		leaf = path->nodes[0];
														
 
															+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+		if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) {
														
 
															+			/* already at max size, make a new one */
														
 
															+			goto insert;
														
 
															+		}
														
 
															+	} else {
														
 
															+		int slot = path->slots[0] + 1;
														
 
															+		/* we didn't find a csum item, insert one */
														
 
															+		nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+		if (path->slots[0] >= nritems - 1) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret == 1)
														
 
															+				found_next = 1;
														
 
															+			if (ret != 0)
														
 
															+				goto insert;
														
 
															+			slot = 0;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
														
 
															+		if (found_key.objectid != objectid ||
														
 
															+		    found_key.type != BTRFS_CSUM_ITEM_KEY) {
														
 
															+			found_next = 1;
														
 
															+			goto insert;
														
 
															+		}
														
 
															+		next_offset = found_key.offset;
														
 
															+		found_next = 1;
														
 
															+		goto insert;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * at this point, we know the tree has an item, but it isn't big
														
 
															+	 * enough yet to put our csum in.  Grow it
														
 
															+	 */
														
 
															+	btrfs_release_path(root, path);
														
 
															+	ret = btrfs_search_slot(trans, root, &file_key, path,
														
 
															+				BTRFS_CRC32_SIZE, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto fail_unlock;
														
 
															+	if (ret == 0) {
														
 
															+		BUG();
														
 
															+	}
														
 
															+	if (path->slots[0] == 0) {
														
 
															+		goto insert;
														
 
															+	}
														
 
															+	path->slots[0]--;
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+	csum_offset = (offset - found_key.offset) >>
														
 
															+			root->fs_info->sb->s_blocksize_bits;
														
 
															+	if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
														
 
															+	    found_key.objectid != objectid ||
														
 
															+	    csum_offset >= MAX_CSUM_ITEMS(root)) {
														
 
															+		goto insert;
														
 
															+	}
														
 
															+	if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
														
 
															+	    BTRFS_CRC32_SIZE) {
														
 
															+		u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE;
														
 
															+		diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+		if (diff != BTRFS_CRC32_SIZE)
														
 
															+			goto insert;
														
 
															+		ret = btrfs_extend_item(trans, root, path, diff);
														
 
															+		BUG_ON(ret);
														
 
															+		goto csum;
														
 
															+	}
														
 
															+
														
 
															+insert:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	csum_offset = 0;
														
 
															+	if (found_next) {
														
 
															+		u64 tmp = min((u64)i_size_read(inode), next_offset);
														
 
															+		tmp -= offset & ~((u64)root->sectorsize -1);
														
 
															+		tmp >>= root->fs_info->sb->s_blocksize_bits;
														
 
															+		tmp = max((u64)1, tmp);
														
 
															+		tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root));
														
 
															+		ins_size = BTRFS_CRC32_SIZE * tmp;
														
 
															+	} else {
														
 
															+		ins_size = BTRFS_CRC32_SIZE;
														
 
															+	}
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
														
 
															+				      ins_size);
														
 
															+	if (ret < 0)
														
 
															+		goto fail_unlock;
														
 
															+	if (ret != 0) {
														
 
															+		WARN_ON(1);
														
 
															+		goto fail_unlock;
														
 
															+	}
														
 
															+csum:
														
 
															+	leaf = path->nodes[0];
														
 
															+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
														
 
															+	ret = 0;
														
 
															+	item = (struct btrfs_csum_item *)((unsigned char *)item +
														
 
															+					  csum_offset * BTRFS_CRC32_SIZE);
														
 
															+found:
														
 
															+	item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
														
 
															+	item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
														
 
															+				      btrfs_item_size_nr(leaf, path->slots[0]));
														
 
															+	eb_token = NULL;
														
 
															+	mutex_unlock(&BTRFS_I(inode)->csum_mutex);
														
 
															+	cond_resched();
														
 
															+next_sector:
														
 
															+
														
 
															+	if (!eb_token ||
														
 
															+	   (unsigned long)item  + BTRFS_CRC32_SIZE >= map_start + map_len) {
														
 
															+		int err;
														
 
															+
														
 
															+		if (eb_token)
														
 
															+			unmap_extent_buffer(leaf, eb_token, KM_USER1);
														
 
															+		eb_token = NULL;
														
 
															+		err = map_private_extent_buffer(leaf, (unsigned long)item,
														
 
															+						BTRFS_CRC32_SIZE,
														
 
															+						&eb_token, &eb_map,
														
 
															+						&map_start, &map_len, KM_USER1);
														
 
															+		if (err)
														
 
															+			eb_token = NULL;
														
 
															+	}
														
 
															+	if (eb_token) {
														
 
															+		memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
														
 
															+		       &sector_sum->sum, BTRFS_CRC32_SIZE);
														
 
															+	} else {
														
 
															+		write_extent_buffer(leaf, &sector_sum->sum,
														
 
															+				    (unsigned long)item, BTRFS_CRC32_SIZE);
														
 
															+	}
														
 
															+
														
 
															+	total_bytes += root->sectorsize;
														
 
															+	sector_sum++;
														
 
															+	if (total_bytes < sums->len) {
														
 
															+		item = (struct btrfs_csum_item *)((char *)item +
														
 
															+						  BTRFS_CRC32_SIZE);
														
 
															+		if (item < item_end && offset + PAGE_CACHE_SIZE ==
														
 
															+		    sector_sum->offset) {
														
 
															+			    offset = sector_sum->offset;
														
 
															+			goto next_sector;
														
 
															+		}
														
 
															+	}
														
 
															+	if (eb_token) {
														
 
															+		unmap_extent_buffer(leaf, eb_token, KM_USER1);
														
 
															+		eb_token = NULL;
														
 
															+	}
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+	cond_resched();
														
 
															+	if (total_bytes < sums->len) {
														
 
															+		btrfs_release_path(root, path);
														
 
															+		goto again;
														
 
															+	}
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+
														
 
															+fail_unlock:
														
 
															+	mutex_unlock(&BTRFS_I(inode)->csum_mutex);
														
 
															+	goto out;
														
 
															+}
														
 
															+
														
 
															+int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root, struct btrfs_path *path,
														
 
															+			u64 isize)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct extent_buffer *leaf = path->nodes[0];
														
 
															+	int slot = path->slots[0];
														
 
															+	int ret;
														
 
															+	u32 new_item_size;
														
 
															+	u64 new_item_span;
														
 
															+	u64 blocks;
														
 
															+
														
 
															+	btrfs_item_key_to_cpu(leaf, &key, slot);
														
 
															+	if (isize <= key.offset)
														
 
															+		return 0;
														
 
															+	new_item_span = isize - key.offset;
														
 
															+	blocks = (new_item_span + root->sectorsize - 1) >>
														
 
															+		root->fs_info->sb->s_blocksize_bits;
														
 
															+	new_item_size = blocks * BTRFS_CRC32_SIZE;
														
 
															+	if (new_item_size >= btrfs_item_size_nr(leaf, slot))
														
 
															+		return 0;
														
 
															+	ret = btrfs_truncate_item(trans, root, path, new_item_size, 1);
														
 
															+	BUG_ON(ret);
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -0,0 +1,1178 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/highmem.h>
														
 
															+#include <linux/time.h>
														
 
															+#include <linux/init.h>
														
 
															+#include <linux/string.h>
														
 
															+#include <linux/smp_lock.h>
														
 
															+#include <linux/backing-dev.h>
														
 
															+#include <linux/mpage.h>
														
 
															+#include <linux/swap.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/statfs.h>
														
 
															+#include <linux/compat.h>
														
 
															+#include <linux/version.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "ioctl.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "tree-log.h"
														
 
															+#include "locking.h"
														
 
															+#include "compat.h"
														
 
															+
														
 
															+
														
 
															+/* simple helper to fault in pages and copy.  This should go away
														
 
															+ * and be replaced with calls into generic code.
														
 
															+ */
														
 
															+static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
														
 
															+					 int write_bytes,
														
 
															+					 struct page **prepared_pages,
														
 
															+					 const char __user * buf)
														
 
															+{
														
 
															+	long page_fault = 0;
														
 
															+	int i;
														
 
															+	int offset = pos & (PAGE_CACHE_SIZE - 1);
														
 
															+
														
 
															+	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
														
 
															+		size_t count = min_t(size_t,
														
 
															+				     PAGE_CACHE_SIZE - offset, write_bytes);
														
 
															+		struct page *page = prepared_pages[i];
														
 
															+		fault_in_pages_readable(buf, count);
														
 
															+
														
 
															+		/* Copy data from userspace to the current page */
														
 
															+		kmap(page);
														
 
															+		page_fault = __copy_from_user(page_address(page) + offset,
														
 
															+					      buf, count);
														
 
															+		/* Flush processor's dcache for this page */
														
 
															+		flush_dcache_page(page);
														
 
															+		kunmap(page);
														
 
															+		buf += count;
														
 
															+		write_bytes -= count;
														
 
															+
														
 
															+		if (page_fault)
														
 
															+			break;
														
 
															+	}
														
 
															+	return page_fault ? -EFAULT : 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * unlocks pages after btrfs_file_write is done with them
														
 
															+ */
														
 
															+static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
														
 
															+{
														
 
															+	size_t i;
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		if (!pages[i])
														
 
															+			break;
														
 
															+		/* page checked is some magic around finding pages that
														
 
															+		 * have been modified without going through btrfs_set_page_dirty
														
 
															+		 * clear it here
														
 
															+		 */
														
 
															+		ClearPageChecked(pages[i]);
														
 
															+		unlock_page(pages[i]);
														
 
															+		mark_page_accessed(pages[i]);
														
 
															+		page_cache_release(pages[i]);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/* this does all the hard work for inserting an inline extent into
														
 
															+ * the btree.  Any existing inline extent is extended as required to make room,
														
 
															+ * otherwise things are inserted as required into the btree
														
 
															+ */
														
 
															+static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
														
 
															+				struct btrfs_root *root, struct inode *inode,
														
 
															+				u64 offset, size_t size,
														
 
															+				struct page **pages, size_t page_offset,
														
 
															+				int num_pages)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	char *kaddr;
														
 
															+	unsigned long ptr;
														
 
															+	struct btrfs_file_extent_item *ei;
														
 
															+	struct page *page;
														
 
															+	u32 datasize;
														
 
															+	int err = 0;
														
 
															+	int ret;
														
 
															+	int i;
														
 
															+	ssize_t cur_size;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	key.offset = offset;
														
 
															+	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
														
 
															+	if (ret < 0) {
														
 
															+		err = ret;
														
 
															+		goto fail;
														
 
															+	}
														
 
															+	if (ret == 1) {
														
 
															+		struct btrfs_key found_key;
														
 
															+
														
 
															+		if (path->slots[0] == 0)
														
 
															+			goto insert;
														
 
															+
														
 
															+		path->slots[0]--;
														
 
															+		leaf = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+
														
 
															+		if (found_key.objectid != inode->i_ino)
														
 
															+			goto insert;
														
 
															+
														
 
															+		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
														
 
															+			goto insert;
														
 
															+		ei = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				    struct btrfs_file_extent_item);
														
 
															+
														
 
															+		if (btrfs_file_extent_type(leaf, ei) !=
														
 
															+		    BTRFS_FILE_EXTENT_INLINE) {
														
 
															+			goto insert;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
														
 
															+		ret = 0;
														
 
															+	}
														
 
															+	if (ret == 0) {
														
 
															+		u32 found_size;
														
 
															+		u64 found_end;
														
 
															+
														
 
															+		leaf = path->nodes[0];
														
 
															+		ei = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				    struct btrfs_file_extent_item);
														
 
															+
														
 
															+		if (btrfs_file_extent_type(leaf, ei) !=
														
 
															+		    BTRFS_FILE_EXTENT_INLINE) {
														
 
															+			err = ret;
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("found wasn't inline offset %Lu inode %lu\n",
														
 
															+			       offset, inode->i_ino);
														
 
															+			goto fail;
														
 
															+		}
														
 
															+		found_size = btrfs_file_extent_inline_len(leaf,
														
 
															+					  btrfs_item_nr(leaf, path->slots[0]));
														
 
															+		found_end = key.offset + found_size;
														
 
															+
														
 
															+		if (found_end < offset + size) {
														
 
															+			btrfs_release_path(root, path);
														
 
															+			ret = btrfs_search_slot(trans, root, &key, path,
														
 
															+						offset + size - found_end, 1);
														
 
															+			BUG_ON(ret != 0);
														
 
															+
														
 
															+			ret = btrfs_extend_item(trans, root, path,
														
 
															+						offset + size - found_end);
														
 
															+			if (ret) {
														
 
															+				err = ret;
														
 
															+				goto fail;
														
 
															+			}
														
 
															+			leaf = path->nodes[0];
														
 
															+			ei = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+					    struct btrfs_file_extent_item);
														
 
															+			inode_add_bytes(inode, offset + size - found_end);
														
 
															+		}
														
 
															+		if (found_end < offset) {
														
 
															+			ptr = btrfs_file_extent_inline_start(ei) + found_size;
														
 
															+			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
														
 
															+		}
														
 
															+	} else {
														
 
															+insert:
														
 
															+		btrfs_release_path(root, path);
														
 
															+		datasize = offset + size - key.offset;
														
 
															+		inode_add_bytes(inode, datasize);
														
 
															+		datasize = btrfs_file_extent_calc_inline_size(datasize);
														
 
															+		ret = btrfs_insert_empty_item(trans, root, path, &key,
														
 
															+					      datasize);
														
 
															+		if (ret) {
														
 
															+			err = ret;
														
 
															+			printk("got bad ret %d\n", ret);
														
 
															+			goto fail;
														
 
															+		}
														
 
															+		leaf = path->nodes[0];
														
 
															+		ei = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				    struct btrfs_file_extent_item);
														
 
															+		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
														
 
															+		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
														
 
															+	}
														
 
															+	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
														
 
															+
														
 
															+	cur_size = size;
														
 
															+	i = 0;
														
 
															+	while (size > 0) {
														
 
															+		page = pages[i];
														
 
															+		kaddr = kmap_atomic(page, KM_USER0);
														
 
															+		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
														
 
															+		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
														
 
															+		kunmap_atomic(kaddr, KM_USER0);
														
 
															+		page_offset = 0;
														
 
															+		ptr += cur_size;
														
 
															+		size -= cur_size;
														
 
															+		if (i >= num_pages) {
														
 
															+			printk("i %d num_pages %d\n", i, num_pages);
														
 
															+		}
														
 
															+		i++;
														
 
															+	}
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+fail:
														
 
															+	btrfs_free_path(path);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * after copy_from_user, pages need to be dirtied and we need to make
														
 
															+ * sure holes are created between the current EOF and the start of
														
 
															+ * any next extents (if required).
														
 
															+ *
														
 
															+ * this also makes the decision about creating an inline extent vs
														
 
															+ * doing real data extents, marking pages dirty and delalloc as required.
														
 
															+ */
														
 
															+static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *root,
														
 
															+				   struct file *file,
														
 
															+				   struct page **pages,
														
 
															+				   size_t num_pages,
														
 
															+				   loff_t pos,
														
 
															+				   size_t write_bytes)
														
 
															+{
														
 
															+	int err = 0;
														
 
															+	int i;
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	u64 hint_byte;
														
 
															+	u64 num_bytes;
														
 
															+	u64 start_pos;
														
 
															+	u64 end_of_last_block;
														
 
															+	u64 end_pos = pos + write_bytes;
														
 
															+	u64 inline_size;
														
 
															+	int did_inline = 0;
														
 
															+	loff_t isize = i_size_read(inode);
														
 
															+
														
 
															+	start_pos = pos & ~((u64)root->sectorsize - 1);
														
 
															+	num_bytes = (write_bytes + pos - start_pos +
														
 
															+		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
														
 
															+
														
 
															+	end_of_last_block = start_pos + num_bytes - 1;
														
 
															+
														
 
															+	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
														
 
															+	trans = btrfs_join_transaction(root, 1);
														
 
															+	if (!trans) {
														
 
															+		err = -ENOMEM;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	hint_byte = 0;
														
 
															+
														
 
															+	if ((end_of_last_block & 4095) == 0) {
														
 
															+		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
														
 
															+	}
														
 
															+	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
														
 
															+
														
 
															+	/* FIXME...EIEIO, ENOSPC and more */
														
 
															+	/* insert any holes we need to create */
														
 
															+	if (isize < start_pos) {
														
 
															+		u64 last_pos_in_file;
														
 
															+		u64 hole_size;
														
 
															+		u64 mask = root->sectorsize - 1;
														
 
															+		last_pos_in_file = (isize + mask) & ~mask;
														
 
															+		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
														
 
															+		if (hole_size > 0) {
														
 
															+			btrfs_wait_ordered_range(inode, last_pos_in_file,
														
 
															+						 last_pos_in_file + hole_size);
														
 
															+			mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+			err = btrfs_drop_extents(trans, root, inode,
														
 
															+						 last_pos_in_file,
														
 
															+						 last_pos_in_file + hole_size,
														
 
															+						 last_pos_in_file,
														
 
															+						 &hint_byte);
														
 
															+			if (err)
														
 
															+				goto failed;
														
 
															+
														
 
															+			err = btrfs_insert_file_extent(trans, root,
														
 
															+						       inode->i_ino,
														
 
															+						       last_pos_in_file,
														
 
															+						       0, 0, hole_size, 0);
														
 
															+			btrfs_drop_extent_cache(inode, last_pos_in_file,
														
 
															+					last_pos_in_file + hole_size - 1, 0);
														
 
															+			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+			btrfs_check_file(root, inode);
														
 
															+		}
														
 
															+		if (err)
														
 
															+			goto failed;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * either allocate an extent for the new bytes or setup the key
														
 
															+	 * to show we are doing inline data in the extent
														
 
															+	 */
														
 
															+	inline_size = end_pos;
														
 
															+	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
														
 
															+	    inline_size > root->fs_info->max_inline ||
														
 
															+	    (inline_size & (root->sectorsize -1)) == 0 ||
														
 
															+	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
														
 
															+		/* check for reserved extents on each page, we don't want
														
 
															+		 * to reset the delalloc bit on things that already have
														
 
															+		 * extents reserved.
														
 
															+		 */
														
 
															+		btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
														
 
															+		for (i = 0; i < num_pages; i++) {
														
 
															+			struct page *p = pages[i];
														
 
															+			SetPageUptodate(p);
														
 
															+			ClearPageChecked(p);
														
 
															+			set_page_dirty(p);
														
 
															+		}
														
 
															+	} else {
														
 
															+		u64 aligned_end;
														
 
															+		/* step one, delete the existing extents in this range */
														
 
															+		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
														
 
															+			~((u64)root->sectorsize - 1);
														
 
															+		mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+		err = btrfs_drop_extents(trans, root, inode, start_pos,
														
 
															+					 aligned_end, aligned_end, &hint_byte);
														
 
															+		if (err)
														
 
															+			goto failed;
														
 
															+		if (isize > inline_size)
														
 
															+			inline_size = min_t(u64, isize, aligned_end);
														
 
															+		inline_size -= start_pos;
														
 
															+		err = insert_inline_extent(trans, root, inode, start_pos,
														
 
															+					   inline_size, pages, 0, num_pages);
														
 
															+		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0);
														
 
															+		BUG_ON(err);
														
 
															+		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+
														
 
															+		/*
														
 
															+		 * an ugly way to do all the prop accounting around
														
 
															+		 * the page bits and mapping tags
														
 
															+		 */
														
 
															+		set_page_writeback(pages[0]);
														
 
															+		end_page_writeback(pages[0]);
														
 
															+		did_inline = 1;
														
 
															+	}
														
 
															+	if (end_pos > isize) {
														
 
															+		i_size_write(inode, end_pos);
														
 
															+		if (did_inline)
														
 
															+			BTRFS_I(inode)->disk_i_size = end_pos;
														
 
															+		btrfs_update_inode(trans, root, inode);
														
 
															+	}
														
 
															+failed:
														
 
															+	err = btrfs_end_transaction(trans, root);
														
 
															+out_unlock:
														
 
															+	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this drops all the extents in the cache that intersect the range
														
 
															+ * [start, end].  Existing extents are split as required.
														
 
															+ */
														
 
															+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
														
 
															+			    int skip_pinned)
														
 
															+{
														
 
															+	struct extent_map *em;
														
 
															+	struct extent_map *split = NULL;
														
 
															+	struct extent_map *split2 = NULL;
														
 
															+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
														
 
															+	u64 len = end - start + 1;
														
 
															+	int ret;
														
 
															+	int testend = 1;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	WARN_ON(end < start);
														
 
															+	if (end == (u64)-1) {
														
 
															+		len = (u64)-1;
														
 
															+		testend = 0;
														
 
															+	}
														
 
															+	while(1) {
														
 
															+		if (!split)
														
 
															+			split = alloc_extent_map(GFP_NOFS);
														
 
															+		if (!split2)
														
 
															+			split2 = alloc_extent_map(GFP_NOFS);
														
 
															+
														
 
															+		spin_lock(&em_tree->lock);
														
 
															+		em = lookup_extent_mapping(em_tree, start, len);
														
 
															+		if (!em) {
														
 
															+			spin_unlock(&em_tree->lock);
														
 
															+			break;
														
 
															+		}
														
 
															+		flags = em->flags;
														
 
															+		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
														
 
															+			spin_unlock(&em_tree->lock);
														
 
															+			if (em->start <= start &&
														
 
															+			    (!testend || em->start + em->len >= start + len)) {
														
 
															+				free_extent_map(em);
														
 
															+				break;
														
 
															+			}
														
 
															+			if (start < em->start) {
														
 
															+				len = em->start - start;
														
 
															+			} else {
														
 
															+				len = start + len - (em->start + em->len);
														
 
															+				start = em->start + em->len;
														
 
															+			}
														
 
															+			free_extent_map(em);
														
 
															+			continue;
														
 
															+		}
														
 
															+		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
														
 
															+		remove_extent_mapping(em_tree, em);
														
 
															+
														
 
															+		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
														
 
															+		    em->start < start) {
														
 
															+			split->start = em->start;
														
 
															+			split->len = start - em->start;
														
 
															+			split->block_start = em->block_start;
														
 
															+			split->bdev = em->bdev;
														
 
															+			split->flags = flags;
														
 
															+			ret = add_extent_mapping(em_tree, split);
														
 
															+			BUG_ON(ret);
														
 
															+			free_extent_map(split);
														
 
															+			split = split2;
														
 
															+			split2 = NULL;
														
 
															+		}
														
 
															+		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
														
 
															+		    testend && em->start + em->len > start + len) {
														
 
															+			u64 diff = start + len - em->start;
														
 
															+
														
 
															+			split->start = start + len;
														
 
															+			split->len = em->start + em->len - (start + len);
														
 
															+			split->bdev = em->bdev;
														
 
															+			split->flags = flags;
														
 
															+
														
 
															+			split->block_start = em->block_start + diff;
														
 
															+
														
 
															+			ret = add_extent_mapping(em_tree, split);
														
 
															+			BUG_ON(ret);
														
 
															+			free_extent_map(split);
														
 
															+			split = NULL;
														
 
															+		}
														
 
															+		spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+		/* once for us */
														
 
															+		free_extent_map(em);
														
 
															+		/* once for the tree*/
														
 
															+		free_extent_map(em);
														
 
															+	}
														
 
															+	if (split)
														
 
															+		free_extent_map(split);
														
 
															+	if (split2)
														
 
															+		free_extent_map(split2);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
														
 
															+{
														
 
															+	return 0;
														
 
															+#if 0
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_file_extent_item *extent;
														
 
															+	u64 last_offset = 0;
														
 
															+	int nritems;
														
 
															+	int slot;
														
 
															+	int found_type;
														
 
															+	int ret;
														
 
															+	int err = 0;
														
 
															+	u64 extent_end = 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
														
 
															+				       last_offset, 0);
														
 
															+	while(1) {
														
 
															+		nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret)
														
 
															+				goto out;
														
 
															+			nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+		}
														
 
															+		slot = path->slots[0];
														
 
															+		leaf = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
														
 
															+		if (found_key.objectid != inode->i_ino)
														
 
															+			break;
														
 
															+		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
														
 
															+			goto out;
														
 
															+
														
 
															+		if (found_key.offset < last_offset) {
														
 
															+			WARN_ON(1);
														
 
															+			btrfs_print_leaf(root, leaf);
														
 
															+			printk("inode %lu found offset %Lu expected %Lu\n",
														
 
															+			       inode->i_ino, found_key.offset, last_offset);
														
 
															+			err = 1;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		extent = btrfs_item_ptr(leaf, slot,
														
 
															+					struct btrfs_file_extent_item);
														
 
															+		found_type = btrfs_file_extent_type(leaf, extent);
														
 
															+		if (found_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+			extent_end = found_key.offset +
														
 
															+			     btrfs_file_extent_num_bytes(leaf, extent);
														
 
															+		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
														
 
															+			struct btrfs_item *item;
														
 
															+			item = btrfs_item_nr(leaf, slot);
														
 
															+			extent_end = found_key.offset +
														
 
															+			     btrfs_file_extent_inline_len(leaf, item);
														
 
															+			extent_end = (extent_end + root->sectorsize - 1) &
														
 
															+				~((u64)root->sectorsize -1 );
														
 
															+		}
														
 
															+		last_offset = extent_end;
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+	if (0 && last_offset < inode->i_size) {
														
 
															+		WARN_ON(1);
														
 
															+		btrfs_print_leaf(root, leaf);
														
 
															+		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
														
 
															+		       last_offset, inode->i_size);
														
 
															+		err = 1;
														
 
															+
														
 
															+	}
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return err;
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this is very complex, but the basic idea is to drop all extents
														
 
															+ * in the range start - end.  hint_block is filled in with a block number
														
 
															+ * that would be a good hint to the block allocator for this file.
														
 
															+ *
														
 
															+ * If an extent intersects the range but is not entirely inside the range
														
 
															+ * it is either truncated or split.  Anything entirely inside the range
														
 
															+ * is deleted from the tree.
														
 
															+ *
														
 
															+ * inline_limit is used to tell this code which offsets in the file to keep
														
 
															+ * if they contain inline extents.
														
 
															+ */
														
 
															+int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root, struct inode *inode,
														
 
															+		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
														
 
															+{
														
 
															+	u64 extent_end = 0;
														
 
															+	u64 search_start = start;
														
 
															+	u64 leaf_start;
														
 
															+	u64 root_gen;
														
 
															+	u64 root_owner;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_file_extent_item *extent;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_file_extent_item old;
														
 
															+	int keep;
														
 
															+	int slot;
														
 
															+	int bookend;
														
 
															+	int found_type;
														
 
															+	int found_extent;
														
 
															+	int found_inline;
														
 
															+	int recow;
														
 
															+	int ret;
														
 
															+
														
 
															+	btrfs_drop_extent_cache(inode, start, end - 1, 0);
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+	while(1) {
														
 
															+		recow = 0;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
														
 
															+					       search_start, -1);
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+		if (ret > 0) {
														
 
															+			if (path->slots[0] == 0) {
														
 
															+				ret = 0;
														
 
															+				goto out;
														
 
															+			}
														
 
															+			path->slots[0]--;
														
 
															+		}
														
 
															+next_slot:
														
 
															+		keep = 0;
														
 
															+		bookend = 0;
														
 
															+		found_extent = 0;
														
 
															+		found_inline = 0;
														
 
															+		leaf_start = 0;
														
 
															+		root_gen = 0;
														
 
															+		root_owner = 0;
														
 
															+		extent = NULL;
														
 
															+		leaf = path->nodes[0];
														
 
															+		slot = path->slots[0];
														
 
															+		ret = 0;
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, slot);
														
 
															+		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
														
 
															+		    key.offset >= end) {
														
 
															+			goto out;
														
 
															+		}
														
 
															+		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
														
 
															+		    key.objectid != inode->i_ino) {
														
 
															+			goto out;
														
 
															+		}
														
 
															+		if (recow) {
														
 
															+			search_start = key.offset;
														
 
															+			continue;
														
 
															+		}
														
 
															+		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
														
 
															+			extent = btrfs_item_ptr(leaf, slot,
														
 
															+						struct btrfs_file_extent_item);
														
 
															+			found_type = btrfs_file_extent_type(leaf, extent);
														
 
															+			if (found_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+				extent_end =
														
 
															+				     btrfs_file_extent_disk_bytenr(leaf,
														
 
															+								   extent);
														
 
															+				if (extent_end)
														
 
															+					*hint_byte = extent_end;
														
 
															+
														
 
															+				extent_end = key.offset +
														
 
															+				     btrfs_file_extent_num_bytes(leaf, extent);
														
 
															+				found_extent = 1;
														
 
															+			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
														
 
															+				struct btrfs_item *item;
														
 
															+				item = btrfs_item_nr(leaf, slot);
														
 
															+				found_inline = 1;
														
 
															+				extent_end = key.offset +
														
 
															+				     btrfs_file_extent_inline_len(leaf, item);
														
 
															+			}
														
 
															+		} else {
														
 
															+			extent_end = search_start;
														
 
															+		}
														
 
															+
														
 
															+		/* we found nothing we can drop */
														
 
															+		if ((!found_extent && !found_inline) ||
														
 
															+		    search_start >= extent_end) {
														
 
															+			int nextret;
														
 
															+			u32 nritems;
														
 
															+			nritems = btrfs_header_nritems(leaf);
														
 
															+			if (slot >= nritems - 1) {
														
 
															+				nextret = btrfs_next_leaf(root, path);
														
 
															+				if (nextret)
														
 
															+					goto out;
														
 
															+				recow = 1;
														
 
															+			} else {
														
 
															+				path->slots[0]++;
														
 
															+			}
														
 
															+			goto next_slot;
														
 
															+		}
														
 
															+
														
 
															+		if (found_inline) {
														
 
															+			u64 mask = root->sectorsize - 1;
														
 
															+			search_start = (extent_end + mask) & ~mask;
														
 
															+		} else
														
 
															+			search_start = extent_end;
														
 
															+		if (end <= extent_end && start >= key.offset && found_inline) {
														
 
															+			*hint_byte = EXTENT_MAP_INLINE;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		if (found_extent) {
														
 
															+			read_extent_buffer(leaf, &old, (unsigned long)extent,
														
 
															+					   sizeof(old));
														
 
															+			root_gen = btrfs_header_generation(leaf);
														
 
															+			root_owner = btrfs_header_owner(leaf);
														
 
															+			leaf_start = leaf->start;
														
 
															+		}
														
 
															+
														
 
															+		if (end < extent_end && end >= key.offset) {
														
 
															+			bookend = 1;
														
 
															+			if (found_inline && start <= key.offset)
														
 
															+				keep = 1;
														
 
															+		}
														
 
															+		/* truncate existing extent */
														
 
															+		if (start > key.offset) {
														
 
															+			u64 new_num;
														
 
															+			u64 old_num;
														
 
															+			keep = 1;
														
 
															+			WARN_ON(start & (root->sectorsize - 1));
														
 
															+			if (found_extent) {
														
 
															+				new_num = start - key.offset;
														
 
															+				old_num = btrfs_file_extent_num_bytes(leaf,
														
 
															+								      extent);
														
 
															+				*hint_byte =
														
 
															+					btrfs_file_extent_disk_bytenr(leaf,
														
 
															+								      extent);
														
 
															+				if (btrfs_file_extent_disk_bytenr(leaf,
														
 
															+								  extent)) {
														
 
															+					inode_sub_bytes(inode, old_num -
														
 
															+							new_num);
														
 
															+				}
														
 
															+				btrfs_set_file_extent_num_bytes(leaf, extent,
														
 
															+								new_num);
														
 
															+				btrfs_mark_buffer_dirty(leaf);
														
 
															+			} else if (key.offset < inline_limit &&
														
 
															+				   (end > extent_end) &&
														
 
															+				   (inline_limit < extent_end)) {
														
 
															+				u32 new_size;
														
 
															+				new_size = btrfs_file_extent_calc_inline_size(
														
 
															+						   inline_limit - key.offset);
														
 
															+				inode_sub_bytes(inode, extent_end -
														
 
															+						inline_limit);
														
 
															+				btrfs_truncate_item(trans, root, path,
														
 
															+						    new_size, 1);
														
 
															+			}
														
 
															+		}
														
 
															+		/* delete the entire extent */
														
 
															+		if (!keep) {
														
 
															+			if (found_inline)
														
 
															+				inode_sub_bytes(inode, extent_end -
														
 
															+						key.offset);
														
 
															+			ret = btrfs_del_item(trans, root, path);
														
 
															+			/* TODO update progress marker and return */
														
 
															+			BUG_ON(ret);
														
 
															+			extent = NULL;
														
 
															+			btrfs_release_path(root, path);
														
 
															+			/* the extent will be freed later */
														
 
															+		}
														
 
															+		if (bookend && found_inline && start <= key.offset) {
														
 
															+			u32 new_size;
														
 
															+			new_size = btrfs_file_extent_calc_inline_size(
														
 
															+						   extent_end - end);
														
 
															+			inode_sub_bytes(inode, end - key.offset);
														
 
															+			ret = btrfs_truncate_item(trans, root, path,
														
 
															+						  new_size, 0);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+		/* create bookend, splitting the extent in two */
														
 
															+		if (bookend && found_extent) {
														
 
															+			u64 disk_bytenr;
														
 
															+			struct btrfs_key ins;
														
 
															+			ins.objectid = inode->i_ino;
														
 
															+			ins.offset = end;
														
 
															+			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
														
 
															+			btrfs_release_path(root, path);
														
 
															+			ret = btrfs_insert_empty_item(trans, root, path, &ins,
														
 
															+						      sizeof(*extent));
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			leaf = path->nodes[0];
														
 
															+			extent = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+						struct btrfs_file_extent_item);
														
 
															+			write_extent_buffer(leaf, &old,
														
 
															+					    (unsigned long)extent, sizeof(old));
														
 
															+
														
 
															+			btrfs_set_file_extent_offset(leaf, extent,
														
 
															+				    le64_to_cpu(old.offset) + end - key.offset);
														
 
															+			WARN_ON(le64_to_cpu(old.num_bytes) <
														
 
															+				(extent_end - end));
														
 
															+			btrfs_set_file_extent_num_bytes(leaf, extent,
														
 
															+							extent_end - end);
														
 
															+			btrfs_set_file_extent_type(leaf, extent,
														
 
															+						   BTRFS_FILE_EXTENT_REG);
														
 
															+
														
 
															+			btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+
														
 
															+			disk_bytenr = le64_to_cpu(old.disk_bytenr);
														
 
															+			if (disk_bytenr != 0) {
														
 
															+				ret = btrfs_inc_extent_ref(trans, root,
														
 
															+						disk_bytenr,
														
 
															+						le64_to_cpu(old.disk_num_bytes),
														
 
															+						leaf->start,
														
 
															+						root->root_key.objectid,
														
 
															+						trans->transid, ins.objectid);
														
 
															+				BUG_ON(ret);
														
 
															+			}
														
 
															+			btrfs_release_path(root, path);
														
 
															+			if (disk_bytenr != 0) {
														
 
															+				inode_add_bytes(inode, extent_end - end);
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if (found_extent && !keep) {
														
 
															+			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);
														
 
															+
														
 
															+			if (disk_bytenr != 0) {
														
 
															+				inode_sub_bytes(inode,
														
 
															+						le64_to_cpu(old.num_bytes));
														
 
															+				ret = btrfs_free_extent(trans, root,
														
 
															+						disk_bytenr,
														
 
															+						le64_to_cpu(old.disk_num_bytes),
														
 
															+						leaf_start, root_owner,
														
 
															+						root_gen, key.objectid, 0);
														
 
															+				BUG_ON(ret);
														
 
															+				*hint_byte = disk_bytenr;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if (search_start >= end) {
														
 
															+			ret = 0;
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	btrfs_check_file(root, inode);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this gets pages into the page cache and locks them down, it also properly
														
 
															+ * waits for data=ordered extents to finish before allowing the pages to be
														
 
															+ * modified.
														
 
															+ */
														
 
															+static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
														
 
															+			 struct page **pages, size_t num_pages,
														
 
															+			 loff_t pos, unsigned long first_index,
														
 
															+			 unsigned long last_index, size_t write_bytes)
														
 
															+{
														
 
															+	int i;
														
 
															+	unsigned long index = pos >> PAGE_CACHE_SHIFT;
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	int err = 0;
														
 
															+	u64 start_pos;
														
 
															+	u64 last_pos;
														
 
															+
														
 
															+	start_pos = pos & ~((u64)root->sectorsize - 1);
														
 
															+	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	memset(pages, 0, num_pages * sizeof(struct page *));
														
 
															+again:
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		pages[i] = grab_cache_page(inode->i_mapping, index + i);
														
 
															+		if (!pages[i]) {
														
 
															+			err = -ENOMEM;
														
 
															+			BUG_ON(1);
														
 
															+		}
														
 
															+		wait_on_page_writeback(pages[i]);
														
 
															+	}
														
 
															+	if (start_pos < inode->i_size) {
														
 
															+		struct btrfs_ordered_extent *ordered;
														
 
															+		lock_extent(&BTRFS_I(inode)->io_tree,
														
 
															+			    start_pos, last_pos - 1, GFP_NOFS);
														
 
															+		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
														
 
															+		if (ordered &&
														
 
															+		    ordered->file_offset + ordered->len > start_pos &&
														
 
															+		    ordered->file_offset < last_pos) {
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+			unlock_extent(&BTRFS_I(inode)->io_tree,
														
 
															+				      start_pos, last_pos - 1, GFP_NOFS);
														
 
															+			for (i = 0; i < num_pages; i++) {
														
 
															+				unlock_page(pages[i]);
														
 
															+				page_cache_release(pages[i]);
														
 
															+			}
														
 
															+			btrfs_wait_ordered_range(inode, start_pos,
														
 
															+						 last_pos - start_pos);
														
 
															+			goto again;
														
 
															+		}
														
 
															+		if (ordered)
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+
														
 
															+		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
														
 
															+				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
														
 
															+				  GFP_NOFS);
														
 
															+		unlock_extent(&BTRFS_I(inode)->io_tree,
														
 
															+			      start_pos, last_pos - 1, GFP_NOFS);
														
 
															+	}
														
 
															+	for (i = 0; i < num_pages; i++) {
														
 
															+		clear_page_dirty_for_io(pages[i]);
														
 
															+		set_page_extent_mapped(pages[i]);
														
 
															+		WARN_ON(!PageLocked(pages[i]));
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
														
 
															+				size_t count, loff_t *ppos)
														
 
															+{
														
 
															+	loff_t pos;
														
 
															+	loff_t start_pos;
														
 
															+	ssize_t num_written = 0;
														
 
															+	ssize_t err = 0;
														
 
															+	int ret = 0;
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct page **pages = NULL;
														
 
															+	int nrptrs;
														
 
															+	struct page *pinned[2];
														
 
															+	unsigned long first_index;
														
 
															+	unsigned long last_index;
														
 
															+	int will_write;
														
 
															+
														
 
															+	will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
														
 
															+		      (file->f_flags & O_DIRECT));
														
 
															+
														
 
															+	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
														
 
															+		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
														
 
															+	pinned[0] = NULL;
														
 
															+	pinned[1] = NULL;
														
 
															+
														
 
															+	pos = *ppos;
														
 
															+	start_pos = pos;
														
 
															+
														
 
															+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
														
 
															+	current->backing_dev_info = inode->i_mapping->backing_dev_info;
														
 
															+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
														
 
															+	if (err)
														
 
															+		goto out_nolock;
														
 
															+	if (count == 0)
														
 
															+		goto out_nolock;
														
 
															+
														
 
															+	err = file_remove_suid(file);
														
 
															+	if (err)
														
 
															+		goto out_nolock;
														
 
															+	file_update_time(file);
														
 
															+
														
 
															+	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
														
 
															+
														
 
															+	mutex_lock(&inode->i_mutex);
														
 
															+	first_index = pos >> PAGE_CACHE_SHIFT;
														
 
															+	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+	/*
														
 
															+	 * if this is a nodatasum mount, force summing off for the inode
														
 
															+	 * all the time.  That way a later mount with summing on won't
														
 
															+	 * get confused
														
 
															+	 */
														
 
															+	if (btrfs_test_opt(root, NODATASUM))
														
 
															+		btrfs_set_flag(inode, NODATASUM);
														
 
															+
														
 
															+	/*
														
 
															+	 * there are lots of better ways to do this, but this code
														
 
															+	 * makes sure the first and last page in the file range are
														
 
															+	 * up to date and ready for cow
														
 
															+	 */
														
 
															+	if ((pos & (PAGE_CACHE_SIZE - 1))) {
														
 
															+		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
														
 
															+		if (!PageUptodate(pinned[0])) {
														
 
															+			ret = btrfs_readpage(NULL, pinned[0]);
														
 
															+			BUG_ON(ret);
														
 
															+			wait_on_page_locked(pinned[0]);
														
 
															+		} else {
														
 
															+			unlock_page(pinned[0]);
														
 
															+		}
														
 
															+	}
														
 
															+	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
														
 
															+		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
														
 
															+		if (!PageUptodate(pinned[1])) {
														
 
															+			ret = btrfs_readpage(NULL, pinned[1]);
														
 
															+			BUG_ON(ret);
														
 
															+			wait_on_page_locked(pinned[1]);
														
 
															+		} else {
														
 
															+			unlock_page(pinned[1]);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	while(count > 0) {
														
 
															+		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
														
 
															+		size_t write_bytes = min(count, nrptrs *
														
 
															+					(size_t)PAGE_CACHE_SIZE -
														
 
															+					 offset);
														
 
															+		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
														
 
															+					PAGE_CACHE_SHIFT;
														
 
															+
														
 
															+		WARN_ON(num_pages > nrptrs);
														
 
															+		memset(pages, 0, sizeof(pages));
														
 
															+
														
 
															+		ret = btrfs_check_free_space(root, write_bytes, 0);
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+
														
 
															+		ret = prepare_pages(root, file, pages, num_pages,
														
 
															+				    pos, first_index, last_index,
														
 
															+				    write_bytes);
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+
														
 
															+		ret = btrfs_copy_from_user(pos, num_pages,
														
 
															+					   write_bytes, pages, buf);
														
 
															+		if (ret) {
														
 
															+			btrfs_drop_pages(pages, num_pages);
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		ret = dirty_and_release_pages(NULL, root, file, pages,
														
 
															+					      num_pages, pos, write_bytes);
														
 
															+		btrfs_drop_pages(pages, num_pages);
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+
														
 
															+		if (will_write) {
														
 
															+			btrfs_fdatawrite_range(inode->i_mapping, pos,
														
 
															+					       pos + write_bytes - 1,
														
 
															+					       WB_SYNC_NONE);
														
 
															+		} else {
														
 
															+			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
														
 
															+							   num_pages);
														
 
															+			if (num_pages <
														
 
															+			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
														
 
															+				btrfs_btree_balance_dirty(root, 1);
														
 
															+			btrfs_throttle(root);
														
 
															+		}
														
 
															+
														
 
															+		buf += write_bytes;
														
 
															+		count -= write_bytes;
														
 
															+		pos += write_bytes;
														
 
															+		num_written += write_bytes;
														
 
															+
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+out:
														
 
															+	mutex_unlock(&inode->i_mutex);
														
 
															+
														
 
															+out_nolock:
														
 
															+	kfree(pages);
														
 
															+	if (pinned[0])
														
 
															+		page_cache_release(pinned[0]);
														
 
															+	if (pinned[1])
														
 
															+		page_cache_release(pinned[1]);
														
 
															+	*ppos = pos;
														
 
															+
														
 
															+	if (num_written > 0 && will_write) {
														
 
															+		struct btrfs_trans_handle *trans;
														
 
															+
														
 
															+		err = btrfs_wait_ordered_range(inode, start_pos, num_written);
														
 
															+		if (err)
														
 
															+			num_written = err;
														
 
															+
														
 
															+		if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
														
 
															+			trans = btrfs_start_transaction(root, 1);
														
 
															+			ret = btrfs_log_dentry_safe(trans, root,
														
 
															+						    file->f_dentry);
														
 
															+			if (ret == 0) {
														
 
															+				btrfs_sync_log(trans, root);
														
 
															+				btrfs_end_transaction(trans, root);
														
 
															+			} else {
														
 
															+				btrfs_commit_transaction(trans, root);
														
 
															+			}
														
 
															+		}
														
 
															+		if (file->f_flags & O_DIRECT) {
														
 
															+			invalidate_mapping_pages(inode->i_mapping,
														
 
															+			      start_pos >> PAGE_CACHE_SHIFT,
														
 
															+			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
														
 
															+		}
														
 
															+	}
														
 
															+	current->backing_dev_info = NULL;
														
 
															+	return num_written ? num_written : err;
														
 
															+}
														
 
															+
														
 
															+int btrfs_release_file(struct inode * inode, struct file * filp)
														
 
															+{
														
 
															+	if (filp->private_data)
														
 
															+		btrfs_ioctl_trans_end(filp);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * fsync call for both files and directories.  This logs the inode into
														
 
															+ * the tree log instead of forcing full commits whenever possible.
														
 
															+ *
														
 
															+ * It needs to call filemap_fdatawait so that all ordered extent updates are
														
 
															+ * in the metadata btree are up to date for copying to the log.
														
 
															+ *
														
 
															+ * It drops the inode mutex before doing the tree log commit.  This is an
														
 
															+ * important optimization for directories because holding the mutex prevents
														
 
															+ * new operations on the dir while we write to disk.
														
 
															+ */
														
 
															+int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
														
 
															+{
														
 
															+	struct inode *inode = dentry->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+
														
 
															+	/*
														
 
															+	 * check the transaction that last modified this inode
														
 
															+	 * and see if its already been committed
														
 
															+	 */
														
 
															+	if (!BTRFS_I(inode)->last_trans)
														
 
															+		goto out;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	if (BTRFS_I(inode)->last_trans <=
														
 
															+	    root->fs_info->last_trans_committed) {
														
 
															+		BTRFS_I(inode)->last_trans = 0;
														
 
															+		mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+	root->fs_info->tree_log_batch++;
														
 
															+	filemap_fdatawait(inode->i_mapping);
														
 
															+	root->fs_info->tree_log_batch++;
														
 
															+
														
 
															+	/*
														
 
															+	 * ok we haven't committed the transaction yet, lets do a commit
														
 
															+	 */
														
 
															+	if (file->private_data)
														
 
															+		btrfs_ioctl_trans_end(file);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	if (!trans) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
														
 
															+	if (ret < 0) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/* we've logged all the items and now have a consistent
														
 
															+	 * version of the file in the log.  It is possible that
														
 
															+	 * someone will come in and modify the file, but that's
														
 
															+	 * fine because the log is consistent on disk, and we
														
 
															+	 * have references to all of the file's extents
														
 
															+	 *
														
 
															+	 * It is possible that someone will come in and log the
														
 
															+	 * file again, but that will end up using the synchronization
														
 
															+	 * inside btrfs_sync_log to keep things safe.
														
 
															+	 */
														
 
															+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
														
 
															+
														
 
															+	if (ret > 0) {
														
 
															+		ret = btrfs_commit_transaction(trans, root);
														
 
															+	} else {
														
 
															+		btrfs_sync_log(trans, root);
														
 
															+		ret = btrfs_end_transaction(trans, root);
														
 
															+	}
														
 
															+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
														
 
															+out:
														
 
															+	return ret > 0 ? EIO : ret;
														
 
															+}
														
 
															+
														
 
															+static struct vm_operations_struct btrfs_file_vm_ops = {
														
 
															+	.fault		= filemap_fault,
														
 
															+	.page_mkwrite	= btrfs_page_mkwrite,
														
 
															+};
														
 
															+
														
 
															+static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
														
 
															+{
														
 
															+	vma->vm_ops = &btrfs_file_vm_ops;
														
 
															+	file_accessed(filp);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+struct file_operations btrfs_file_operations = {
														
 
															+	.llseek		= generic_file_llseek,
														
 
															+	.read		= do_sync_read,
														
 
															+	.aio_read       = generic_file_aio_read,
														
 
															+	.splice_read	= generic_file_splice_read,
														
 
															+	.write		= btrfs_file_write,
														
 
															+	.mmap		= btrfs_file_mmap,
														
 
															+	.open		= generic_file_open,
														
 
															+	.release	= btrfs_release_file,
														
 
															+	.fsync		= btrfs_sync_file,
														
 
															+	.unlocked_ioctl	= btrfs_ioctl,
														
 
															+#ifdef CONFIG_COMPAT
														
 
															+	.compat_ioctl	= btrfs_ioctl,
														
 
															+#endif
														
 
															+};
														
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -0,0 +1,449 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Red Hat.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/sched.h>
														
 
															+#include "ctree.h"
														
 
															+
														
 
															+static int tree_insert_offset(struct rb_root *root, u64 offset,
														
 
															+			      struct rb_node *node)
														
 
															+{
														
 
															+	struct rb_node **p = &root->rb_node;
														
 
															+	struct rb_node *parent = NULL;
														
 
															+	struct btrfs_free_space *info;
														
 
															+
														
 
															+	while (*p) {
														
 
															+		parent = *p;
														
 
															+		info = rb_entry(parent, struct btrfs_free_space, offset_index);
														
 
															+
														
 
															+		if (offset < info->offset)
														
 
															+			p = &(*p)->rb_left;
														
 
															+		else if (offset > info->offset)
														
 
															+			p = &(*p)->rb_right;
														
 
															+		else
														
 
															+			return -EEXIST;
														
 
															+	}
														
 
															+
														
 
															+	rb_link_node(node, parent, p);
														
 
															+	rb_insert_color(node, root);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int tree_insert_bytes(struct rb_root *root, u64 bytes,
														
 
															+			     struct rb_node *node)
														
 
															+{
														
 
															+	struct rb_node **p = &root->rb_node;
														
 
															+	struct rb_node *parent = NULL;
														
 
															+	struct btrfs_free_space *info;
														
 
															+
														
 
															+	while (*p) {
														
 
															+		parent = *p;
														
 
															+		info = rb_entry(parent, struct btrfs_free_space, bytes_index);
														
 
															+
														
 
															+		if (bytes < info->bytes)
														
 
															+			p = &(*p)->rb_left;
														
 
															+		else
														
 
															+			p = &(*p)->rb_right;
														
 
															+	}
														
 
															+
														
 
															+	rb_link_node(node, parent, p);
														
 
															+	rb_insert_color(node, root);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * searches the tree for the given offset.  If contains is set we will return
														
 
															+ * the free space that contains the given offset.  If contains is not set we
														
 
															+ * will return the free space that starts at or after the given offset and is
														
 
															+ * at least bytes long.
														
 
															+ */
														
 
															+static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
														
 
															+						   u64 offset, u64 bytes,
														
 
															+						   int contains)
														
 
															+{
														
 
															+	struct rb_node *n = root->rb_node;
														
 
															+	struct btrfs_free_space *entry, *ret = NULL;
														
 
															+
														
 
															+	while (n) {
														
 
															+		entry = rb_entry(n, struct btrfs_free_space, offset_index);
														
 
															+
														
 
															+		if (offset < entry->offset) {
														
 
															+			if (!contains &&
														
 
															+			    (!ret || entry->offset < ret->offset) &&
														
 
															+			    (bytes <= entry->bytes))
														
 
															+				ret = entry;
														
 
															+			n = n->rb_left;
														
 
															+		} else if (offset > entry->offset) {
														
 
															+			if ((entry->offset + entry->bytes - 1) >= offset &&
														
 
															+			    bytes <= entry->bytes) {
														
 
															+				ret = entry;
														
 
															+				break;
														
 
															+			}
														
 
															+			n = n->rb_right;
														
 
															+		} else {
														
 
															+			if (bytes > entry->bytes) {
														
 
															+				n = n->rb_right;
														
 
															+				continue;
														
 
															+			}
														
 
															+			ret = entry;
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * return a chunk at least bytes size, as close to offset that we can get.
														
 
															+ */
														
 
															+static struct btrfs_free_space *tree_search_bytes(struct rb_root *root,
														
 
															+						  u64 offset, u64 bytes)
														
 
															+{
														
 
															+	struct rb_node *n = root->rb_node;
														
 
															+	struct btrfs_free_space *entry, *ret = NULL;
														
 
															+
														
 
															+	while (n) {
														
 
															+		entry = rb_entry(n, struct btrfs_free_space, bytes_index);
														
 
															+
														
 
															+		if (bytes < entry->bytes) {
														
 
															+			/*
														
 
															+			 * We prefer to get a hole size as close to the size we
														
 
															+			 * are asking for so we don't take small slivers out of
														
 
															+			 * huge holes, but we also want to get as close to the
														
 
															+			 * offset as possible so we don't have a whole lot of
														
 
															+			 * fragmentation.
														
 
															+			 */
														
 
															+			if (offset <= entry->offset) {
														
 
															+				if (!ret)
														
 
															+					ret = entry;
														
 
															+				else if (entry->bytes < ret->bytes)
														
 
															+					ret = entry;
														
 
															+				else if (entry->offset < ret->offset)
														
 
															+					ret = entry;
														
 
															+			}
														
 
															+			n = n->rb_left;
														
 
															+		} else if (bytes > entry->bytes) {
														
 
															+			n = n->rb_right;
														
 
															+		} else {
														
 
															+			/*
														
 
															+			 * Ok we may have multiple chunks of the wanted size,
														
 
															+			 * so we don't want to take the first one we find, we
														
 
															+			 * want to take the one closest to our given offset, so
														
 
															+			 * keep searching just in case theres a better match.
														
 
															+			 */
														
 
															+			n = n->rb_right;
														
 
															+			if (offset > entry->offset)
														
 
															+				continue;
														
 
															+			else if (!ret || entry->offset < ret->offset)
														
 
															+				ret = entry;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void unlink_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			      struct btrfs_free_space *info)
														
 
															+{
														
 
															+	rb_erase(&info->offset_index, &block_group->free_space_offset);
														
 
															+	rb_erase(&info->bytes_index, &block_group->free_space_bytes);
														
 
															+}
														
 
															+
														
 
															+static int link_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			   struct btrfs_free_space *info)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+
														
 
															+	ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
														
 
															+				 &info->offset_index);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes,
														
 
															+				&info->bytes_index);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			 u64 offset, u64 bytes)
														
 
															+{
														
 
															+	struct btrfs_free_space *right_info;
														
 
															+	struct btrfs_free_space *left_info;
														
 
															+	struct btrfs_free_space *info = NULL;
														
 
															+	struct btrfs_free_space *alloc_info;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
														
 
															+	if (!alloc_info)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/*
														
 
															+	 * first we want to see if there is free space adjacent to the range we
														
 
															+	 * are adding, if there is remove that struct and add a new one to
														
 
															+	 * cover the entire range
														
 
															+	 */
														
 
															+	spin_lock(&block_group->lock);
														
 
															+
														
 
															+	right_info = tree_search_offset(&block_group->free_space_offset,
														
 
															+					offset+bytes, 0, 1);
														
 
															+	left_info = tree_search_offset(&block_group->free_space_offset,
														
 
															+				       offset-1, 0, 1);
														
 
															+
														
 
															+	if (right_info && right_info->offset == offset+bytes) {
														
 
															+		unlink_free_space(block_group, right_info);
														
 
															+		info = right_info;
														
 
															+		info->offset = offset;
														
 
															+		info->bytes += bytes;
														
 
															+	} else if (right_info && right_info->offset != offset+bytes) {
														
 
															+		printk(KERN_ERR "adding space in the middle of an existing "
														
 
															+		       "free space area. existing: offset=%Lu, bytes=%Lu. "
														
 
															+		       "new: offset=%Lu, bytes=%Lu\n", right_info->offset,
														
 
															+		       right_info->bytes, offset, bytes);
														
 
															+		BUG();
														
 
															+	}
														
 
															+
														
 
															+	if (left_info) {
														
 
															+		unlink_free_space(block_group, left_info);
														
 
															+
														
 
															+		if (unlikely((left_info->offset + left_info->bytes) !=
														
 
															+			     offset)) {
														
 
															+			printk(KERN_ERR "free space to the left of new free "
														
 
															+			       "space isn't quite right. existing: offset=%Lu,"
														
 
															+			       " bytes=%Lu. new: offset=%Lu, bytes=%Lu\n",
														
 
															+			       left_info->offset, left_info->bytes, offset,
														
 
															+			       bytes);
														
 
															+			BUG();
														
 
															+		}
														
 
															+
														
 
															+		if (info) {
														
 
															+			info->offset = left_info->offset;
														
 
															+			info->bytes += left_info->bytes;
														
 
															+			kfree(left_info);
														
 
															+		} else {
														
 
															+			info = left_info;
														
 
															+			info->bytes += bytes;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (info) {
														
 
															+		ret = link_free_space(block_group, info);
														
 
															+		if (!ret)
														
 
															+			info = NULL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	info = alloc_info;
														
 
															+	alloc_info = NULL;
														
 
															+	info->offset = offset;
														
 
															+	info->bytes = bytes;
														
 
															+
														
 
															+	ret = link_free_space(block_group, info);
														
 
															+	if (ret)
														
 
															+		kfree(info);
														
 
															+out:
														
 
															+	spin_unlock(&block_group->lock);
														
 
															+	if (ret) {
														
 
															+		printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret);
														
 
															+		if (ret == -EEXIST)
														
 
															+			BUG();
														
 
															+	}
														
 
															+
														
 
															+	if (alloc_info)
														
 
															+		kfree(alloc_info);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			    u64 offset, u64 bytes)
														
 
															+{
														
 
															+	struct btrfs_free_space *info;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	spin_lock(&block_group->lock);
														
 
															+	info = tree_search_offset(&block_group->free_space_offset, offset, 0,
														
 
															+				  1);
														
 
															+
														
 
															+	if (info && info->offset == offset) {
														
 
															+		if (info->bytes < bytes) {
														
 
															+			printk(KERN_ERR "Found free space at %Lu, size %Lu,"
														
 
															+			       "trying to use %Lu\n",
														
 
															+			       info->offset, info->bytes, bytes);
														
 
															+			WARN_ON(1);
														
 
															+			ret = -EINVAL;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		unlink_free_space(block_group, info);
														
 
															+
														
 
															+		if (info->bytes == bytes) {
														
 
															+			kfree(info);
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		info->offset += bytes;
														
 
															+		info->bytes -= bytes;
														
 
															+
														
 
															+		ret = link_free_space(block_group, info);
														
 
															+		BUG_ON(ret);
														
 
															+	} else if (info && info->offset < offset &&
														
 
															+		   info->offset + info->bytes >= offset + bytes) {
														
 
															+		u64 old_start = info->offset;
														
 
															+		/*
														
 
															+		 * we're freeing space in the middle of the info,
														
 
															+		 * this can happen during tree log replay
														
 
															+		 *
														
 
															+		 * first unlink the old info and then
														
 
															+		 * insert it again after the hole we're creating
														
 
															+		 */
														
 
															+		unlink_free_space(block_group, info);
														
 
															+		if (offset + bytes < info->offset + info->bytes) {
														
 
															+			u64 old_end = info->offset + info->bytes;
														
 
															+
														
 
															+			info->offset = offset + bytes;
														
 
															+			info->bytes = old_end - info->offset;
														
 
															+			ret = link_free_space(block_group, info);
														
 
															+			BUG_ON(ret);
														
 
															+		} else {
														
 
															+			/* the hole we're creating ends at the end
														
 
															+			 * of the info struct, just free the info
														
 
															+			 */
														
 
															+			kfree(info);
														
 
															+		}
														
 
															+
														
 
															+		/* step two, insert a new info struct to cover anything
														
 
															+		 * before the hole
														
 
															+		 */
														
 
															+		spin_unlock(&block_group->lock);
														
 
															+		ret = btrfs_add_free_space(block_group, old_start,
														
 
															+					   offset - old_start);
														
 
															+		BUG_ON(ret);
														
 
															+		goto out_nolock;
														
 
															+	} else {
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+out:
														
 
															+	spin_unlock(&block_group->lock);
														
 
															+out_nolock:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
														
 
															+			   u64 bytes)
														
 
															+{
														
 
															+	struct btrfs_free_space *info;
														
 
															+	struct rb_node *n;
														
 
															+	int count = 0;
														
 
															+
														
 
															+	for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
														
 
															+		info = rb_entry(n, struct btrfs_free_space, offset_index);
														
 
															+		if (info->bytes >= bytes)
														
 
															+			count++;
														
 
															+		//printk(KERN_INFO "offset=%Lu, bytes=%Lu\n", info->offset,
														
 
															+		//       info->bytes);
														
 
															+	}
														
 
															+	printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
														
 
															+	       "\n", count);
														
 
															+}
														
 
															+
														
 
															+u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
														
 
															+{
														
 
															+	struct btrfs_free_space *info;
														
 
															+	struct rb_node *n;
														
 
															+	u64 ret = 0;
														
 
															+
														
 
															+	for (n = rb_first(&block_group->free_space_offset); n;
														
 
															+	     n = rb_next(n)) {
														
 
															+		info = rb_entry(n, struct btrfs_free_space, offset_index);
														
 
															+		ret += info->bytes;
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
														
 
															+{
														
 
															+	struct btrfs_free_space *info;
														
 
															+	struct rb_node *node;
														
 
															+
														
 
															+	spin_lock(&block_group->lock);
														
 
															+	while ((node = rb_last(&block_group->free_space_bytes)) != NULL) {
														
 
															+		info = rb_entry(node, struct btrfs_free_space, bytes_index);
														
 
															+		unlink_free_space(block_group, info);
														
 
															+		kfree(info);
														
 
															+		if (need_resched()) {
														
 
															+			spin_unlock(&block_group->lock);
														
 
															+			cond_resched();
														
 
															+			spin_lock(&block_group->lock);
														
 
															+		}
														
 
															+	}
														
 
															+	spin_unlock(&block_group->lock);
														
 
															+}
														
 
															+
														
 
															+struct btrfs_free_space *btrfs_find_free_space_offset(struct
														
 
															+						      btrfs_block_group_cache
														
 
															+						      *block_group, u64 offset,
														
 
															+						      u64 bytes)
														
 
															+{
														
 
															+	struct btrfs_free_space *ret;
														
 
															+
														
 
															+	spin_lock(&block_group->lock);
														
 
															+	ret = tree_search_offset(&block_group->free_space_offset, offset,
														
 
															+				 bytes, 0);
														
 
															+	spin_unlock(&block_group->lock);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_free_space *btrfs_find_free_space_bytes(struct
														
 
															+						     btrfs_block_group_cache
														
 
															+						     *block_group, u64 offset,
														
 
															+						     u64 bytes)
														
 
															+{
														
 
															+	struct btrfs_free_space *ret;
														
 
															+
														
 
															+	spin_lock(&block_group->lock);
														
 
															+
														
 
															+	ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes);
														
 
															+	spin_unlock(&block_group->lock);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
														
 
															+					       *block_group, u64 offset,
														
 
															+					       u64 bytes)
														
 
															+{
														
 
															+	struct btrfs_free_space *ret;
														
 
															+
														
 
															+	spin_lock(&block_group->lock);
														
 
															+	ret = tree_search_offset(&block_group->free_space_offset, offset,
														
 
															+				 bytes, 0);
														
 
															+	if (!ret)
														
 
															+		ret = tree_search_bytes(&block_group->free_space_bytes,
														
 
															+					offset, bytes);
														
 
															+
														
 
															+	spin_unlock(&block_group->lock);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -0,0 +1,27 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __HASH__
														
 
															+#define __HASH__
														
 
															+
														
 
															+#include "crc32c.h"
														
 
															+static inline u64 btrfs_name_hash(const char *name, int len)
														
 
															+{
														
 
															+	return btrfs_crc32c((u32)~1, name, len);
														
 
															+}
														
 
															+#endif
														
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -0,0 +1,206 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+
														
 
															+int find_name_in_backref(struct btrfs_path *path, const char * name,
														
 
															+			 int name_len, struct btrfs_inode_ref **ref_ret)
														
 
															+{
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_inode_ref *ref;
														
 
															+	unsigned long ptr;
														
 
															+	unsigned long name_ptr;
														
 
															+	u32 item_size;
														
 
															+	u32 cur_offset = 0;
														
 
															+	int len;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
														
 
															+	while (cur_offset < item_size) {
														
 
															+		ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
														
 
															+		len = btrfs_inode_ref_name_len(leaf, ref);
														
 
															+		name_ptr = (unsigned long)(ref + 1);
														
 
															+		cur_offset += len + sizeof(*ref);
														
 
															+		if (len != name_len)
														
 
															+			continue;
														
 
															+		if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
														
 
															+			*ref_ret = ref;
														
 
															+			return 1;
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   const char *name, int name_len,
														
 
															+			   u64 inode_objectid, u64 ref_objectid, u64 *index)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_inode_ref *ref;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	unsigned long ptr;
														
 
															+	unsigned long item_start;
														
 
															+	u32 item_size;
														
 
															+	u32 sub_item_len;
														
 
															+	int ret;
														
 
															+	int del_len = name_len + sizeof(*ref);
														
 
															+
														
 
															+	key.objectid = inode_objectid;
														
 
															+	key.offset = ref_objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	if (ret > 0) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	} else if (ret < 0) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (!find_name_in_backref(path, name, name_len, &ref)) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	leaf = path->nodes[0];
														
 
															+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+
														
 
															+	if (index)
														
 
															+		*index = btrfs_inode_ref_index(leaf, ref);
														
 
															+
														
 
															+	if (del_len == item_size) {
														
 
															+		ret = btrfs_del_item(trans, root, path);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	ptr = (unsigned long)ref;
														
 
															+	sub_item_len = name_len + sizeof(*ref);
														
 
															+	item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
														
 
															+	memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
														
 
															+			      item_size - (ptr + sub_item_len - item_start));
														
 
															+	ret = btrfs_truncate_item(trans, root, path,
														
 
															+				  item_size - sub_item_len, 1);
														
 
															+	BUG_ON(ret);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   const char *name, int name_len,
														
 
															+			   u64 inode_objectid, u64 ref_objectid, u64 index)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_inode_ref *ref;
														
 
															+	unsigned long ptr;
														
 
															+	int ret;
														
 
															+	int ins_len = name_len + sizeof(*ref);
														
 
															+
														
 
															+	key.objectid = inode_objectid;
														
 
															+	key.offset = ref_objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &key,
														
 
															+				      ins_len);
														
 
															+	if (ret == -EEXIST) {
														
 
															+		u32 old_size;
														
 
															+
														
 
															+		if (find_name_in_backref(path, name, name_len, &ref))
														
 
															+			goto out;
														
 
															+
														
 
															+		old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
														
 
															+		ret = btrfs_extend_item(trans, root, path, ins_len);
														
 
															+		BUG_ON(ret);
														
 
															+		ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+				     struct btrfs_inode_ref);
														
 
															+		ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
														
 
															+		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
														
 
															+		btrfs_set_inode_ref_index(path->nodes[0], ref, index);
														
 
															+		ptr = (unsigned long)(ref + 1);
														
 
															+		ret = 0;
														
 
															+	} else if (ret < 0) {
														
 
															+		goto out;
														
 
															+	} else {
														
 
															+		ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+				     struct btrfs_inode_ref);
														
 
															+		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
														
 
															+		btrfs_set_inode_ref_index(path->nodes[0], ref, index);
														
 
															+		ptr = (unsigned long)(ref + 1);
														
 
															+	}
														
 
															+	write_extent_buffer(path->nodes[0], name, ptr, name_len);
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     struct btrfs_path *path, u64 objectid)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	int ret;
														
 
															+	key.objectid = objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
														
 
															+	key.offset = 0;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &key,
														
 
															+				      sizeof(struct btrfs_inode_item));
														
 
															+	if (ret == 0 && objectid > root->highest_inode)
														
 
															+		root->highest_inode = objectid;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		       *root, struct btrfs_path *path,
														
 
															+		       struct btrfs_key *location, int mod)
														
 
															+{
														
 
															+	int ins_len = mod < 0 ? -1 : 0;
														
 
															+	int cow = mod != 0;
														
 
															+	int ret;
														
 
															+	int slot;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_key found_key;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
														
 
															+	if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY &&
														
 
															+	    location->offset == (u64)-1 && path->slots[0] != 0) {
														
 
															+		slot = path->slots[0] - 1;
														
 
															+		leaf = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
														
 
															+		if (found_key.objectid == location->objectid &&
														
 
															+		    btrfs_key_type(&found_key) == btrfs_key_type(location)) {
														
 
															+			path->slots[0]--;
														
 
															+			return 0;
														
 
															+		}
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -0,0 +1,145 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+
														
 
															+int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	struct extent_buffer *l;
														
 
															+	struct btrfs_key search_key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	int slot;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
														
 
															+	search_key.type = -1;
														
 
															+	search_key.offset = (u64)-1;
														
 
															+	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto error;
														
 
															+	BUG_ON(ret == 0);
														
 
															+	if (path->slots[0] > 0) {
														
 
															+		slot = path->slots[0] - 1;
														
 
															+		l = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(l, &found_key, slot);
														
 
															+		*objectid = found_key.objectid;
														
 
															+	} else {
														
 
															+		*objectid = BTRFS_FIRST_FREE_OBJECTID;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * walks the btree of allocated inodes and find a hole.
														
 
															+ */
														
 
															+int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root,
														
 
															+			     u64 dirid, u64 *objectid)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	int ret;
														
 
															+	int slot = 0;
														
 
															+	u64 last_ino = 0;
														
 
															+	int start_found;
														
 
															+	struct extent_buffer *l;
														
 
															+	struct btrfs_key search_key;
														
 
															+	u64 search_start = dirid;
														
 
															+
														
 
															+	mutex_lock(&root->objectid_mutex);
														
 
															+	if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
														
 
															+	    root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
														
 
															+		*objectid = ++root->last_inode_alloc;
														
 
															+		mutex_unlock(&root->objectid_mutex);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID);
														
 
															+	search_key.objectid = search_start;
														
 
															+	search_key.type = 0;
														
 
															+	search_key.offset = 0;
														
 
															+
														
 
															+	btrfs_init_path(path);
														
 
															+	start_found = 0;
														
 
															+	ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto error;
														
 
															+
														
 
															+	while (1) {
														
 
															+		l = path->nodes[0];
														
 
															+		slot = path->slots[0];
														
 
															+		if (slot >= btrfs_header_nritems(l)) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret == 0)
														
 
															+				continue;
														
 
															+			if (ret < 0)
														
 
															+				goto error;
														
 
															+			if (!start_found) {
														
 
															+				*objectid = search_start;
														
 
															+				start_found = 1;
														
 
															+				goto found;
														
 
															+			}
														
 
															+			*objectid = last_ino > search_start ?
														
 
															+				last_ino : search_start;
														
 
															+			goto found;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(l, &key, slot);
														
 
															+		if (key.objectid >= search_start) {
														
 
															+			if (start_found) {
														
 
															+				if (last_ino < search_start)
														
 
															+					last_ino = search_start;
														
 
															+				if (key.objectid > last_ino) {
														
 
															+					*objectid = last_ino;
														
 
															+					goto found;
														
 
															+				}
														
 
															+			} else if (key.objectid > search_start) {
														
 
															+				*objectid = search_start;
														
 
															+				goto found;
														
 
															+			}
														
 
															+		}
														
 
															+		if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
														
 
															+			break;
														
 
															+
														
 
															+		start_found = 1;
														
 
															+		last_ino = key.objectid + 1;
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+	// FIXME -ENOSPC
														
 
															+	BUG_ON(1);
														
 
															+found:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_free_path(path);
														
 
															+	BUG_ON(*objectid < search_start);
														
 
															+	mutex_unlock(&root->objectid_mutex);
														
 
															+	return 0;
														
 
															+error:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_free_path(path);
														
 
															+	mutex_unlock(&root->objectid_mutex);
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -0,0 +1,3908 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/bio.h>
														
 
															+#include <linux/buffer_head.h>
														
 
															+#include <linux/file.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/highmem.h>
														
 
															+#include <linux/time.h>
														
 
															+#include <linux/init.h>
														
 
															+#include <linux/string.h>
														
 
															+#include <linux/smp_lock.h>
														
 
															+#include <linux/backing-dev.h>
														
 
															+#include <linux/mpage.h>
														
 
															+#include <linux/swap.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/statfs.h>
														
 
															+#include <linux/compat.h>
														
 
															+#include <linux/bit_spinlock.h>
														
 
															+#include <linux/version.h>
														
 
															+#include <linux/xattr.h>
														
 
															+#include <linux/posix_acl.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "ioctl.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "volumes.h"
														
 
															+#include "ordered-data.h"
														
 
															+#include "xattr.h"
														
 
															+#include "compat.h"
														
 
															+#include "tree-log.h"
														
 
															+#include "ref-cache.h"
														
 
															+
														
 
															+struct btrfs_iget_args {
														
 
															+	u64 ino;
														
 
															+	struct btrfs_root *root;
														
 
															+};
														
 
															+
														
 
															+static struct inode_operations btrfs_dir_inode_operations;
														
 
															+static struct inode_operations btrfs_symlink_inode_operations;
														
 
															+static struct inode_operations btrfs_dir_ro_inode_operations;
														
 
															+static struct inode_operations btrfs_special_inode_operations;
														
 
															+static struct inode_operations btrfs_file_inode_operations;
														
 
															+static struct address_space_operations btrfs_aops;
														
 
															+static struct address_space_operations btrfs_symlink_aops;
														
 
															+static struct file_operations btrfs_dir_file_operations;
														
 
															+static struct extent_io_ops btrfs_extent_io_ops;
														
 
															+
														
 
															+static struct kmem_cache *btrfs_inode_cachep;
														
 
															+struct kmem_cache *btrfs_trans_handle_cachep;
														
 
															+struct kmem_cache *btrfs_transaction_cachep;
														
 
															+struct kmem_cache *btrfs_bit_radix_cachep;
														
 
															+struct kmem_cache *btrfs_path_cachep;
														
 
															+
														
 
															+#define S_SHIFT 12
														
 
															+static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
														
 
															+	[S_IFREG >> S_SHIFT]	= BTRFS_FT_REG_FILE,
														
 
															+	[S_IFDIR >> S_SHIFT]	= BTRFS_FT_DIR,
														
 
															+	[S_IFCHR >> S_SHIFT]	= BTRFS_FT_CHRDEV,
														
 
															+	[S_IFBLK >> S_SHIFT]	= BTRFS_FT_BLKDEV,
														
 
															+	[S_IFIFO >> S_SHIFT]	= BTRFS_FT_FIFO,
														
 
															+	[S_IFSOCK >> S_SHIFT]	= BTRFS_FT_SOCK,
														
 
															+	[S_IFLNK >> S_SHIFT]	= BTRFS_FT_SYMLINK,
														
 
															+};
														
 
															+
														
 
															+static void btrfs_truncate(struct inode *inode);
														
 
															+
														
 
															+/*
														
 
															+ * a very lame attempt at stopping writes when the FS is 85% full.  There
														
 
															+ * are countless ways this is incorrect, but it is better than nothing.
														
 
															+ */
														
 
															+int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
														
 
															+			   int for_del)
														
 
															+{
														
 
															+	u64 total;
														
 
															+	u64 used;
														
 
															+	u64 thresh;
														
 
															+	unsigned long flags;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
														
 
															+	total = btrfs_super_total_bytes(&root->fs_info->super_copy);
														
 
															+	used = btrfs_super_bytes_used(&root->fs_info->super_copy);
														
 
															+	if (for_del)
														
 
															+		thresh = total * 90;
														
 
															+	else
														
 
															+		thresh = total * 85;
														
 
															+
														
 
															+	do_div(thresh, 100);
														
 
															+
														
 
															+	if (used + root->fs_info->delalloc_bytes + num_required > thresh)
														
 
															+		ret = -ENOSPC;
														
 
															+	spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when extent_io.c finds a delayed allocation range in the file,
														
 
															+ * the call backs end up in this code.  The basic idea is to
														
 
															+ * allocate extents on disk for the range, and create ordered data structs
														
 
															+ * in ram to track those extents.
														
 
															+ */
														
 
															+static int cow_file_range(struct inode *inode, u64 start, u64 end)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	u64 alloc_hint = 0;
														
 
															+	u64 num_bytes;
														
 
															+	u64 cur_alloc_size;
														
 
															+	u64 blocksize = root->sectorsize;
														
 
															+	u64 orig_num_bytes;
														
 
															+	struct btrfs_key ins;
														
 
															+	struct extent_map *em;
														
 
															+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	trans = btrfs_join_transaction(root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+
														
 
															+	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
														
 
															+	num_bytes = max(blocksize,  num_bytes);
														
 
															+	orig_num_bytes = num_bytes;
														
 
															+
														
 
															+	if (alloc_hint == EXTENT_MAP_INLINE)
														
 
															+		goto out;
														
 
															+
														
 
															+	BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
														
 
															+	mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+	btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
														
 
															+	mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+
														
 
															+	while(num_bytes > 0) {
														
 
															+		cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
														
 
															+		ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
														
 
															+					   root->sectorsize, 0, alloc_hint,
														
 
															+					   (u64)-1, &ins, 1);
														
 
															+		if (ret) {
														
 
															+			WARN_ON(1);
														
 
															+			goto out;
														
 
															+		}
														
 
															+		em = alloc_extent_map(GFP_NOFS);
														
 
															+		em->start = start;
														
 
															+		em->len = ins.offset;
														
 
															+		em->block_start = ins.objectid;
														
 
															+		em->bdev = root->fs_info->fs_devices->latest_bdev;
														
 
															+		mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+		set_bit(EXTENT_FLAG_PINNED, &em->flags);
														
 
															+		while(1) {
														
 
															+			spin_lock(&em_tree->lock);
														
 
															+			ret = add_extent_mapping(em_tree, em);
														
 
															+			spin_unlock(&em_tree->lock);
														
 
															+			if (ret != -EEXIST) {
														
 
															+				free_extent_map(em);
														
 
															+				break;
														
 
															+			}
														
 
															+			btrfs_drop_extent_cache(inode, start,
														
 
															+						start + ins.offset - 1, 0);
														
 
															+		}
														
 
															+		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+
														
 
															+		cur_alloc_size = ins.offset;
														
 
															+		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
														
 
															+					       ins.offset, 0);
														
 
															+		BUG_ON(ret);
														
 
															+		if (num_bytes < cur_alloc_size) {
														
 
															+			printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
														
 
															+			       cur_alloc_size);
														
 
															+			break;
														
 
															+		}
														
 
															+		num_bytes -= cur_alloc_size;
														
 
															+		alloc_hint = ins.objectid + ins.offset;
														
 
															+		start += cur_alloc_size;
														
 
															+	}
														
 
															+out:
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when nowcow writeback call back.  This checks for snapshots or COW copies
														
 
															+ * of the extents that exist in the file, and COWs the file as required.
														
 
															+ *
														
 
															+ * If no cow copies or snapshots exist, we write directly to the existing
														
 
															+ * blocks on disk
														
 
															+ */
														
 
															+static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
														
 
															+{
														
 
															+	u64 extent_start;
														
 
															+	u64 extent_end;
														
 
															+	u64 bytenr;
														
 
															+	u64 loops = 0;
														
 
															+	u64 total_fs_bytes;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int found_type;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_file_extent_item *item;
														
 
															+	int ret;
														
 
															+	int err = 0;
														
 
															+	struct btrfs_key found_key;
														
 
															+
														
 
															+	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	trans = btrfs_join_transaction(root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+again:
														
 
															+	ret = btrfs_lookup_file_extent(NULL, root, path,
														
 
															+				       inode->i_ino, start, 0);
														
 
															+	if (ret < 0) {
														
 
															+		err = ret;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (ret != 0) {
														
 
															+		if (path->slots[0] == 0)
														
 
															+			goto not_found;
														
 
															+		path->slots[0]--;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	item = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+			      struct btrfs_file_extent_item);
														
 
															+
														
 
															+	/* are we inside the extent that was found? */
														
 
															+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+	found_type = btrfs_key_type(&found_key);
														
 
															+	if (found_key.objectid != inode->i_ino ||
														
 
															+	    found_type != BTRFS_EXTENT_DATA_KEY)
														
 
															+		goto not_found;
														
 
															+
														
 
															+	found_type = btrfs_file_extent_type(leaf, item);
														
 
															+	extent_start = found_key.offset;
														
 
															+	if (found_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+		u64 extent_num_bytes;
														
 
															+
														
 
															+		extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
														
 
															+		extent_end = extent_start + extent_num_bytes;
														
 
															+		err = 0;
														
 
															+
														
 
															+		if (loops && start != extent_start)
														
 
															+			goto not_found;
														
 
															+
														
 
															+		if (start < extent_start || start >= extent_end)
														
 
															+			goto not_found;
														
 
															+
														
 
															+		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
														
 
															+		if (bytenr == 0)
														
 
															+			goto not_found;
														
 
															+
														
 
															+		if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
														
 
															+			goto not_found;
														
 
															+		/*
														
 
															+		 * we may be called by the resizer, make sure we're inside
														
 
															+		 * the limits of the FS
														
 
															+		 */
														
 
															+		block_group = btrfs_lookup_block_group(root->fs_info,
														
 
															+						       bytenr);
														
 
															+		if (!block_group || block_group->ro)
														
 
															+			goto not_found;
														
 
															+
														
 
															+		bytenr += btrfs_file_extent_offset(leaf, item);
														
 
															+		extent_num_bytes = min(end + 1, extent_end) - start;
														
 
															+		ret = btrfs_add_ordered_extent(inode, start, bytenr,
														
 
															+						extent_num_bytes, 1);
														
 
															+		if (ret) {
														
 
															+			err = ret;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		btrfs_release_path(root, path);
														
 
															+		start = extent_end;
														
 
															+		if (start <= end) {
														
 
															+			loops++;
														
 
															+			goto again;
														
 
															+		}
														
 
															+	} else {
														
 
															+not_found:
														
 
															+		btrfs_end_transaction(trans, root);
														
 
															+		btrfs_free_path(path);
														
 
															+		return cow_file_range(inode, start, end);
														
 
															+	}
														
 
															+out:
														
 
															+	WARN_ON(err);
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	btrfs_free_path(path);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * extent_io.c call back to do delayed allocation processing
														
 
															+ */
														
 
															+static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (btrfs_test_opt(root, NODATACOW) ||
														
 
															+	    btrfs_test_flag(inode, NODATACOW))
														
 
															+		ret = run_delalloc_nocow(inode, start, end);
														
 
															+	else
														
 
															+		ret = cow_file_range(inode, start, end);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * extent_io.c set_bit_hook, used to track delayed allocation
														
 
															+ * bytes in this file, and to maintain the list of inodes that
														
 
															+ * have pending delalloc work to be done.
														
 
															+ */
														
 
															+int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
														
 
															+		       unsigned long old, unsigned long bits)
														
 
															+{
														
 
															+	unsigned long flags;
														
 
															+	if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
														
 
															+		struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+		spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
														
 
															+		BTRFS_I(inode)->delalloc_bytes += end - start + 1;
														
 
															+		root->fs_info->delalloc_bytes += end - start + 1;
														
 
															+		if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
														
 
															+			list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
														
 
															+				      &root->fs_info->delalloc_inodes);
														
 
															+		}
														
 
															+		spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * extent_io.c clear_bit_hook, see set_bit_hook for why
														
 
															+ */
														
 
															+int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
														
 
															+			 unsigned long old, unsigned long bits)
														
 
															+{
														
 
															+	if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
														
 
															+		struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+		unsigned long flags;
														
 
															+
														
 
															+		spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
														
 
															+		if (end - start + 1 > root->fs_info->delalloc_bytes) {
														
 
															+			printk("warning: delalloc account %Lu %Lu\n",
														
 
															+			       end - start + 1, root->fs_info->delalloc_bytes);
														
 
															+			root->fs_info->delalloc_bytes = 0;
														
 
															+			BTRFS_I(inode)->delalloc_bytes = 0;
														
 
															+		} else {
														
 
															+			root->fs_info->delalloc_bytes -= end - start + 1;
														
 
															+			BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
														
 
															+		}
														
 
															+		if (BTRFS_I(inode)->delalloc_bytes == 0 &&
														
 
															+		    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
														
 
															+			list_del_init(&BTRFS_I(inode)->delalloc_inodes);
														
 
															+		}
														
 
															+		spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
														
 
															+ * we don't create bios that span stripes or chunks
														
 
															+ */
														
 
															+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
														
 
															+			 size_t size, struct bio *bio)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
														
 
															+	struct btrfs_mapping_tree *map_tree;
														
 
															+	u64 logical = (u64)bio->bi_sector << 9;
														
 
															+	u64 length = 0;
														
 
															+	u64 map_length;
														
 
															+	int ret;
														
 
															+
														
 
															+	length = bio->bi_size;
														
 
															+	map_tree = &root->fs_info->mapping_tree;
														
 
															+	map_length = length;
														
 
															+	ret = btrfs_map_block(map_tree, READ, logical,
														
 
															+			      &map_length, NULL, 0);
														
 
															+
														
 
															+	if (map_length < length + size) {
														
 
															+		return 1;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * in order to insert checksums into the metadata in large chunks,
														
 
															+ * we wait until bio submission time.   All the pages in the bio are
														
 
															+ * checksummed and sums are attached onto the ordered extent record.
														
 
															+ *
														
 
															+ * At IO completion time the cums attached on the ordered extent record
														
 
															+ * are inserted into the btree
														
 
															+ */
														
 
															+int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
														
 
															+			  int mirror_num)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	ret = btrfs_csum_one_bio(root, inode, bio);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	return btrfs_map_bio(root, rw, bio, mirror_num, 1);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * extent_io.c submission hook. This does the right thing for csum calculation on write,
														
 
															+ * or reading the csums from the tree before a read
														
 
															+ */
														
 
															+int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
														
 
															+			  int mirror_num)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (btrfs_test_opt(root, NODATASUM) ||
														
 
															+	    btrfs_test_flag(inode, NODATASUM)) {
														
 
															+		goto mapit;
														
 
															+	}
														
 
															+
														
 
															+	if (!(rw & (1 << BIO_RW))) {
														
 
															+		btrfs_lookup_bio_sums(root, inode, bio);
														
 
															+		goto mapit;
														
 
															+	}
														
 
															+	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
														
 
															+				   inode, rw, bio, mirror_num,
														
 
															+				   __btrfs_submit_bio_hook);
														
 
															+mapit:
														
 
															+	return btrfs_map_bio(root, rw, bio, mirror_num, 0);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * given a list of ordered sums record them in the inode.  This happens
														
 
															+ * at IO completion time based on sums calculated at bio submission time.
														
 
															+ */
														
 
															+static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
														
 
															+			     struct inode *inode, u64 file_offset,
														
 
															+			     struct list_head *list)
														
 
															+{
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_ordered_sum *sum;
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	list_for_each(cur, list) {
														
 
															+		sum = list_entry(cur, struct btrfs_ordered_sum, list);
														
 
															+		btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
														
 
															+				       inode, sum);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end)
														
 
															+{
														
 
															+	return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
														
 
															+				   GFP_NOFS);
														
 
															+}
														
 
															+
														
 
															+/* see btrfs_writepage_start_hook for details on why this is required */
														
 
															+struct btrfs_writepage_fixup {
														
 
															+	struct page *page;
														
 
															+	struct btrfs_work work;
														
 
															+};
														
 
															+
														
 
															+void btrfs_writepage_fixup_worker(struct btrfs_work *work)
														
 
															+{
														
 
															+	struct btrfs_writepage_fixup *fixup;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	struct page *page;
														
 
															+	struct inode *inode;
														
 
															+	u64 page_start;
														
 
															+	u64 page_end;
														
 
															+
														
 
															+	fixup = container_of(work, struct btrfs_writepage_fixup, work);
														
 
															+	page = fixup->page;
														
 
															+again:
														
 
															+	lock_page(page);
														
 
															+	if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
														
 
															+		ClearPageChecked(page);
														
 
															+		goto out_page;
														
 
															+	}
														
 
															+
														
 
															+	inode = page->mapping->host;
														
 
															+	page_start = page_offset(page);
														
 
															+	page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
														
 
															+
														
 
															+	lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
														
 
															+
														
 
															+	/* already ordered? We're done */
														
 
															+	if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
														
 
															+			     EXTENT_ORDERED, 0)) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	ordered = btrfs_lookup_ordered_extent(inode, page_start);
														
 
															+	if (ordered) {
														
 
															+		unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
														
 
															+			      page_end, GFP_NOFS);
														
 
															+		unlock_page(page);
														
 
															+		btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+		goto again;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_extent_delalloc(inode, page_start, page_end);
														
 
															+	ClearPageChecked(page);
														
 
															+out:
														
 
															+	unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
														
 
															+out_page:
														
 
															+	unlock_page(page);
														
 
															+	page_cache_release(page);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * There are a few paths in the higher layers of the kernel that directly
														
 
															+ * set the page dirty bit without asking the filesystem if it is a
														
 
															+ * good idea.  This causes problems because we want to make sure COW
														
 
															+ * properly happens and the data=ordered rules are followed.
														
 
															+ *
														
 
															+ * In our case any range that doesn't have the EXTENT_ORDERED bit set
														
 
															+ * hasn't been properly setup for IO.  We kick off an async process
														
 
															+ * to fix it up.  The async helper will wait for ordered extents, set
														
 
															+ * the delalloc bit and make it safe to write the page.
														
 
															+ */
														
 
															+int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
														
 
															+{
														
 
															+	struct inode *inode = page->mapping->host;
														
 
															+	struct btrfs_writepage_fixup *fixup;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
														
 
															+			     EXTENT_ORDERED, 0);
														
 
															+	if (ret)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (PageChecked(page))
														
 
															+		return -EAGAIN;
														
 
															+
														
 
															+	fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
														
 
															+	if (!fixup)
														
 
															+		return -EAGAIN;
														
 
															+
														
 
															+	SetPageChecked(page);
														
 
															+	page_cache_get(page);
														
 
															+	fixup->work.func = btrfs_writepage_fixup_worker;
														
 
															+	fixup->page = page;
														
 
															+	btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
														
 
															+	return -EAGAIN;
														
 
															+}
														
 
															+
														
 
															+/* as ordered data IO finishes, this gets called so we can finish
														
 
															+ * an ordered extent if the range of bytes in the file it covers are
														
 
															+ * fully written.
														
 
															+ */
														
 
															+static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_ordered_extent *ordered_extent;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	struct btrfs_file_extent_item *extent_item;
														
 
															+	struct btrfs_path *path = NULL;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	u64 alloc_hint = 0;
														
 
															+	struct list_head list;
														
 
															+	struct btrfs_key ins;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
														
 
															+	if (!ret)
														
 
															+		return 0;
														
 
															+
														
 
															+	trans = btrfs_join_transaction(root, 1);
														
 
															+
														
 
															+	ordered_extent = btrfs_lookup_ordered_extent(inode, start);
														
 
															+	BUG_ON(!ordered_extent);
														
 
															+	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
														
 
															+		goto nocow;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	lock_extent(io_tree, ordered_extent->file_offset,
														
 
															+		    ordered_extent->file_offset + ordered_extent->len - 1,
														
 
															+		    GFP_NOFS);
														
 
															+
														
 
															+	INIT_LIST_HEAD(&list);
														
 
															+
														
 
															+	mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+
														
 
															+	ret = btrfs_drop_extents(trans, root, inode,
														
 
															+				 ordered_extent->file_offset,
														
 
															+				 ordered_extent->file_offset +
														
 
															+				 ordered_extent->len,
														
 
															+				 ordered_extent->file_offset, &alloc_hint);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	ins.objectid = inode->i_ino;
														
 
															+	ins.offset = ordered_extent->file_offset;
														
 
															+	ins.type = BTRFS_EXTENT_DATA_KEY;
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &ins,
														
 
															+				      sizeof(*extent_item));
														
 
															+	BUG_ON(ret);
														
 
															+	leaf = path->nodes[0];
														
 
															+	extent_item = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				     struct btrfs_file_extent_item);
														
 
															+	btrfs_set_file_extent_generation(leaf, extent_item, trans->transid);
														
 
															+	btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG);
														
 
															+	btrfs_set_file_extent_disk_bytenr(leaf, extent_item,
														
 
															+					  ordered_extent->start);
														
 
															+	btrfs_set_file_extent_disk_num_bytes(leaf, extent_item,
														
 
															+					     ordered_extent->len);
														
 
															+	btrfs_set_file_extent_offset(leaf, extent_item, 0);
														
 
															+	btrfs_set_file_extent_num_bytes(leaf, extent_item,
														
 
															+					ordered_extent->len);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+	btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
														
 
															+				ordered_extent->file_offset +
														
 
															+				ordered_extent->len - 1, 0);
														
 
															+	mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+
														
 
															+	ins.objectid = ordered_extent->start;
														
 
															+	ins.offset = ordered_extent->len;
														
 
															+	ins.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+	ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
														
 
															+					  root->root_key.objectid,
														
 
															+					  trans->transid, inode->i_ino, &ins);
														
 
															+	BUG_ON(ret);
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	inode_add_bytes(inode, ordered_extent->len);
														
 
															+	unlock_extent(io_tree, ordered_extent->file_offset,
														
 
															+		    ordered_extent->file_offset + ordered_extent->len - 1,
														
 
															+		    GFP_NOFS);
														
 
															+nocow:
														
 
															+	add_pending_csums(trans, inode, ordered_extent->file_offset,
														
 
															+			  &ordered_extent->list);
														
 
															+
														
 
															+	mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+	btrfs_ordered_update_i_size(inode, ordered_extent);
														
 
															+	btrfs_update_inode(trans, root, inode);
														
 
															+	btrfs_remove_ordered_extent(inode, ordered_extent);
														
 
															+	mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+
														
 
															+	/* once for us */
														
 
															+	btrfs_put_ordered_extent(ordered_extent);
														
 
															+	/* once for the tree */
														
 
															+	btrfs_put_ordered_extent(ordered_extent);
														
 
															+
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	if (path)
														
 
															+		btrfs_free_path(path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
														
 
															+				struct extent_state *state, int uptodate)
														
 
															+{
														
 
															+	return btrfs_finish_ordered_io(page->mapping->host, start, end);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * When IO fails, either with EIO or csum verification fails, we
														
 
															+ * try other mirrors that might have a good copy of the data.  This
														
 
															+ * io_failure_record is used to record state as we go through all the
														
 
															+ * mirrors.  If another mirror has good data, the page is set up to date
														
 
															+ * and things continue.  If a good mirror can't be found, the original
														
 
															+ * bio end_io callback is called to indicate things have failed.
														
 
															+ */
														
 
															+struct io_failure_record {
														
 
															+	struct page *page;
														
 
															+	u64 start;
														
 
															+	u64 len;
														
 
															+	u64 logical;
														
 
															+	int last_mirror;
														
 
															+};
														
 
															+
														
 
															+int btrfs_io_failed_hook(struct bio *failed_bio,
														
 
															+			 struct page *page, u64 start, u64 end,
														
 
															+			 struct extent_state *state)
														
 
															+{
														
 
															+	struct io_failure_record *failrec = NULL;
														
 
															+	u64 private;
														
 
															+	struct extent_map *em;
														
 
															+	struct inode *inode = page->mapping->host;
														
 
															+	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
														
 
															+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
														
 
															+	struct bio *bio;
														
 
															+	int num_copies;
														
 
															+	int ret;
														
 
															+	int rw;
														
 
															+	u64 logical;
														
 
															+
														
 
															+	ret = get_state_private(failure_tree, start, &private);
														
 
															+	if (ret) {
														
 
															+		failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
														
 
															+		if (!failrec)
														
 
															+			return -ENOMEM;
														
 
															+		failrec->start = start;
														
 
															+		failrec->len = end - start + 1;
														
 
															+		failrec->last_mirror = 0;
														
 
															+
														
 
															+		spin_lock(&em_tree->lock);
														
 
															+		em = lookup_extent_mapping(em_tree, start, failrec->len);
														
 
															+		if (em->start > start || em->start + em->len < start) {
														
 
															+			free_extent_map(em);
														
 
															+			em = NULL;
														
 
															+		}
														
 
															+		spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+		if (!em || IS_ERR(em)) {
														
 
															+			kfree(failrec);
														
 
															+			return -EIO;
														
 
															+		}
														
 
															+		logical = start - em->start;
														
 
															+		logical = em->block_start + logical;
														
 
															+		failrec->logical = logical;
														
 
															+		free_extent_map(em);
														
 
															+		set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
														
 
															+				EXTENT_DIRTY, GFP_NOFS);
														
 
															+		set_state_private(failure_tree, start,
														
 
															+				 (u64)(unsigned long)failrec);
														
 
															+	} else {
														
 
															+		failrec = (struct io_failure_record *)(unsigned long)private;
														
 
															+	}
														
 
															+	num_copies = btrfs_num_copies(
														
 
															+			      &BTRFS_I(inode)->root->fs_info->mapping_tree,
														
 
															+			      failrec->logical, failrec->len);
														
 
															+	failrec->last_mirror++;
														
 
															+	if (!state) {
														
 
															+		spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
														
 
															+		state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
														
 
															+						    failrec->start,
														
 
															+						    EXTENT_LOCKED);
														
 
															+		if (state && state->start != failrec->start)
														
 
															+			state = NULL;
														
 
															+		spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
														
 
															+	}
														
 
															+	if (!state || failrec->last_mirror > num_copies) {
														
 
															+		set_state_private(failure_tree, failrec->start, 0);
														
 
															+		clear_extent_bits(failure_tree, failrec->start,
														
 
															+				  failrec->start + failrec->len - 1,
														
 
															+				  EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
														
 
															+		kfree(failrec);
														
 
															+		return -EIO;
														
 
															+	}
														
 
															+	bio = bio_alloc(GFP_NOFS, 1);
														
 
															+	bio->bi_private = state;
														
 
															+	bio->bi_end_io = failed_bio->bi_end_io;
														
 
															+	bio->bi_sector = failrec->logical >> 9;
														
 
															+	bio->bi_bdev = failed_bio->bi_bdev;
														
 
															+	bio->bi_size = 0;
														
 
															+	bio_add_page(bio, page, failrec->len, start - page_offset(page));
														
 
															+	if (failed_bio->bi_rw & (1 << BIO_RW))
														
 
															+		rw = WRITE;
														
 
															+	else
														
 
															+		rw = READ;
														
 
															+
														
 
															+	BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
														
 
															+						      failrec->last_mirror);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * each time an IO finishes, we do a fast check in the IO failure tree
														
 
															+ * to see if we need to process or clean up an io_failure_record
														
 
															+ */
														
 
															+int btrfs_clean_io_failures(struct inode *inode, u64 start)
														
 
															+{
														
 
															+	u64 private;
														
 
															+	u64 private_failure;
														
 
															+	struct io_failure_record *failure;
														
 
															+	int ret;
														
 
															+
														
 
															+	private = 0;
														
 
															+	if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
														
 
															+			     (u64)-1, 1, EXTENT_DIRTY)) {
														
 
															+		ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
														
 
															+					start, &private_failure);
														
 
															+		if (ret == 0) {
														
 
															+			failure = (struct io_failure_record *)(unsigned long)
														
 
															+				   private_failure;
														
 
															+			set_state_private(&BTRFS_I(inode)->io_failure_tree,
														
 
															+					  failure->start, 0);
														
 
															+			clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
														
 
															+					  failure->start,
														
 
															+					  failure->start + failure->len - 1,
														
 
															+					  EXTENT_DIRTY | EXTENT_LOCKED,
														
 
															+					  GFP_NOFS);
														
 
															+			kfree(failure);
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when reads are done, we need to check csums to verify the data is correct
														
 
															+ * if there's a match, we allow the bio to finish.  If not, we go through
														
 
															+ * the io_failure_record routines to find good copies
														
 
															+ */
														
 
															+int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
														
 
															+			       struct extent_state *state)
														
 
															+{
														
 
															+	size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
														
 
															+	struct inode *inode = page->mapping->host;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	char *kaddr;
														
 
															+	u64 private = ~(u32)0;
														
 
															+	int ret;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	u32 csum = ~(u32)0;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	if (btrfs_test_opt(root, NODATASUM) ||
														
 
															+	    btrfs_test_flag(inode, NODATASUM))
														
 
															+		return 0;
														
 
															+	if (state && state->start == start) {
														
 
															+		private = state->private;
														
 
															+		ret = 0;
														
 
															+	} else {
														
 
															+		ret = get_state_private(io_tree, start, &private);
														
 
															+	}
														
 
															+	local_irq_save(flags);
														
 
															+	kaddr = kmap_atomic(page, KM_IRQ0);
														
 
															+	if (ret) {
														
 
															+		goto zeroit;
														
 
															+	}
														
 
															+	csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
														
 
															+	btrfs_csum_final(csum, (char *)&csum);
														
 
															+	if (csum != private) {
														
 
															+		goto zeroit;
														
 
															+	}
														
 
															+	kunmap_atomic(kaddr, KM_IRQ0);
														
 
															+	local_irq_restore(flags);
														
 
															+
														
 
															+	/* if the io failure tree for this inode is non-empty,
														
 
															+	 * check to see if we've recovered from a failed IO
														
 
															+	 */
														
 
															+	btrfs_clean_io_failures(inode, start);
														
 
															+	return 0;
														
 
															+
														
 
															+zeroit:
														
 
															+	printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
														
 
															+	       page->mapping->host->i_ino, (unsigned long long)start, csum,
														
 
															+	       private);
														
 
															+	memset(kaddr + offset, 1, end - start + 1);
														
 
															+	flush_dcache_page(page);
														
 
															+	kunmap_atomic(kaddr, KM_IRQ0);
														
 
															+	local_irq_restore(flags);
														
 
															+	if (private == 0)
														
 
															+		return 0;
														
 
															+	return -EIO;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This creates an orphan entry for the given inode in case something goes
														
 
															+ * wrong in the middle of an unlink/truncate.
														
 
															+ */
														
 
															+int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	spin_lock(&root->list_lock);
														
 
															+
														
 
															+	/* already on the orphan list, we're good */
														
 
															+	if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
														
 
															+		spin_unlock(&root->list_lock);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
														
 
															+
														
 
															+	spin_unlock(&root->list_lock);
														
 
															+
														
 
															+	/*
														
 
															+	 * insert an orphan item to track this unlinked/truncated file
														
 
															+	 */
														
 
															+	ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * We have done the truncate/delete so we can go ahead and remove the orphan
														
 
															+ * item for this particular inode.
														
 
															+ */
														
 
															+int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	spin_lock(&root->list_lock);
														
 
															+
														
 
															+	if (list_empty(&BTRFS_I(inode)->i_orphan)) {
														
 
															+		spin_unlock(&root->list_lock);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	list_del_init(&BTRFS_I(inode)->i_orphan);
														
 
															+	if (!trans) {
														
 
															+		spin_unlock(&root->list_lock);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock(&root->list_lock);
														
 
															+
														
 
															+	ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this cleans up any orphans that may be left on the list from the last use
														
 
															+ * of this root.
														
 
															+ */
														
 
															+void btrfs_orphan_cleanup(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_item *item;
														
 
															+	struct btrfs_key key, found_key;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct inode *inode;
														
 
															+	int ret = 0, nr_unlink = 0, nr_truncate = 0;
														
 
															+
														
 
															+	/* don't do orphan cleanup if the fs is readonly. */
														
 
															+	if (root->fs_info->sb->s_flags & MS_RDONLY)
														
 
															+		return;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return;
														
 
															+	path->reada = -1;
														
 
															+
														
 
															+	key.objectid = BTRFS_ORPHAN_OBJECTID;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+
														
 
															+	while (1) {
														
 
															+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+		if (ret < 0) {
														
 
															+			printk(KERN_ERR "Error searching slot for orphan: %d"
														
 
															+			       "\n", ret);
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+		 * if ret == 0 means we found what we were searching for, which
														
 
															+		 * is weird, but possible, so only screw with path if we didnt
														
 
															+		 * find the key and see if we have stuff that matches
														
 
															+		 */
														
 
															+		if (ret > 0) {
														
 
															+			if (path->slots[0] == 0)
														
 
															+				break;
														
 
															+			path->slots[0]--;
														
 
															+		}
														
 
															+
														
 
															+		/* pull out the item */
														
 
															+		leaf = path->nodes[0];
														
 
															+		item = btrfs_item_nr(leaf, path->slots[0]);
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+
														
 
															+		/* make sure the item matches what we want */
														
 
															+		if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
														
 
															+			break;
														
 
															+		if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY)
														
 
															+			break;
														
 
															+
														
 
															+		/* release the path since we're done with it */
														
 
															+		btrfs_release_path(root, path);
														
 
															+
														
 
															+		/*
														
 
															+		 * this is where we are basically btrfs_lookup, without the
														
 
															+		 * crossing root thing.  we store the inode number in the
														
 
															+		 * offset of the orphan item.
														
 
															+		 */
														
 
															+		inode = btrfs_iget_locked(root->fs_info->sb,
														
 
															+					  found_key.offset, root);
														
 
															+		if (!inode)
														
 
															+			break;
														
 
															+
														
 
															+		if (inode->i_state & I_NEW) {
														
 
															+			BTRFS_I(inode)->root = root;
														
 
															+
														
 
															+			/* have to set the location manually */
														
 
															+			BTRFS_I(inode)->location.objectid = inode->i_ino;
														
 
															+			BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
														
 
															+			BTRFS_I(inode)->location.offset = 0;
														
 
															+
														
 
															+			btrfs_read_locked_inode(inode);
														
 
															+			unlock_new_inode(inode);
														
 
															+		}
														
 
															+
														
 
															+		/*
														
 
															+		 * add this inode to the orphan list so btrfs_orphan_del does
														
 
															+		 * the proper thing when we hit it
														
 
															+		 */
														
 
															+		spin_lock(&root->list_lock);
														
 
															+		list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
														
 
															+		spin_unlock(&root->list_lock);
														
 
															+
														
 
															+		/*
														
 
															+		 * if this is a bad inode, means we actually succeeded in
														
 
															+		 * removing the inode, but not the orphan record, which means
														
 
															+		 * we need to manually delete the orphan since iput will just
														
 
															+		 * do a destroy_inode
														
 
															+		 */
														
 
															+		if (is_bad_inode(inode)) {
														
 
															+			trans = btrfs_start_transaction(root, 1);
														
 
															+			btrfs_orphan_del(trans, inode);
														
 
															+			btrfs_end_transaction(trans, root);
														
 
															+			iput(inode);
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		/* if we have links, this was a truncate, lets do that */
														
 
															+		if (inode->i_nlink) {
														
 
															+			nr_truncate++;
														
 
															+			btrfs_truncate(inode);
														
 
															+		} else {
														
 
															+			nr_unlink++;
														
 
															+		}
														
 
															+
														
 
															+		/* this will do delete_inode and everything for us */
														
 
															+		iput(inode);
														
 
															+	}
														
 
															+
														
 
															+	if (nr_unlink)
														
 
															+		printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
														
 
															+	if (nr_truncate)
														
 
															+		printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * read an inode from the btree into the in-memory inode
														
 
															+ */
														
 
															+void btrfs_read_locked_inode(struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_inode_item *inode_item;
														
 
															+	struct btrfs_timespec *tspec;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_key location;
														
 
															+	u64 alloc_group_block;
														
 
															+	u32 rdev;
														
 
															+	int ret;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
														
 
															+
														
 
															+	ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
														
 
															+	if (ret)
														
 
															+		goto make_bad;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	inode_item = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				    struct btrfs_inode_item);
														
 
															+
														
 
															+	inode->i_mode = btrfs_inode_mode(leaf, inode_item);
														
 
															+	inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
														
 
															+	inode->i_uid = btrfs_inode_uid(leaf, inode_item);
														
 
															+	inode->i_gid = btrfs_inode_gid(leaf, inode_item);
														
 
															+	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
														
 
															+
														
 
															+	tspec = btrfs_inode_atime(inode_item);
														
 
															+	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
														
 
															+	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
														
 
															+
														
 
															+	tspec = btrfs_inode_mtime(inode_item);
														
 
															+	inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
														
 
															+	inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
														
 
															+
														
 
															+	tspec = btrfs_inode_ctime(inode_item);
														
 
															+	inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
														
 
															+	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
														
 
															+
														
 
															+	inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
														
 
															+	BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
														
 
															+	inode->i_generation = BTRFS_I(inode)->generation;
														
 
															+	inode->i_rdev = 0;
														
 
															+	rdev = btrfs_inode_rdev(leaf, inode_item);
														
 
															+
														
 
															+	BTRFS_I(inode)->index_cnt = (u64)-1;
														
 
															+
														
 
															+	alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
														
 
															+	BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
														
 
															+						       alloc_group_block);
														
 
															+	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
														
 
															+	if (!BTRFS_I(inode)->block_group) {
														
 
															+		BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
														
 
															+						 NULL, 0,
														
 
															+						 BTRFS_BLOCK_GROUP_METADATA, 0);
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	inode_item = NULL;
														
 
															+
														
 
															+	switch (inode->i_mode & S_IFMT) {
														
 
															+	case S_IFREG:
														
 
															+		inode->i_mapping->a_ops = &btrfs_aops;
														
 
															+		inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
														
 
															+		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
														
 
															+		inode->i_fop = &btrfs_file_operations;
														
 
															+		inode->i_op = &btrfs_file_inode_operations;
														
 
															+		break;
														
 
															+	case S_IFDIR:
														
 
															+		inode->i_fop = &btrfs_dir_file_operations;
														
 
															+		if (root == root->fs_info->tree_root)
														
 
															+			inode->i_op = &btrfs_dir_ro_inode_operations;
														
 
															+		else
														
 
															+			inode->i_op = &btrfs_dir_inode_operations;
														
 
															+		break;
														
 
															+	case S_IFLNK:
														
 
															+		inode->i_op = &btrfs_symlink_inode_operations;
														
 
															+		inode->i_mapping->a_ops = &btrfs_symlink_aops;
														
 
															+		inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
														
 
															+		break;
														
 
															+	default:
														
 
															+		init_special_inode(inode, inode->i_mode, rdev);
														
 
															+		break;
														
 
															+	}
														
 
															+	return;
														
 
															+
														
 
															+make_bad:
														
 
															+	btrfs_free_path(path);
														
 
															+	make_bad_inode(inode);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * given a leaf and an inode, copy the inode fields into the leaf
														
 
															+ */
														
 
															+static void fill_inode_item(struct btrfs_trans_handle *trans,
														
 
															+			    struct extent_buffer *leaf,
														
 
															+			    struct btrfs_inode_item *item,
														
 
															+			    struct inode *inode)
														
 
															+{
														
 
															+	btrfs_set_inode_uid(leaf, item, inode->i_uid);
														
 
															+	btrfs_set_inode_gid(leaf, item, inode->i_gid);
														
 
															+	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
														
 
															+	btrfs_set_inode_mode(leaf, item, inode->i_mode);
														
 
															+	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
														
 
															+
														
 
															+	btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
														
 
															+			       inode->i_atime.tv_sec);
														
 
															+	btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
														
 
															+				inode->i_atime.tv_nsec);
														
 
															+
														
 
															+	btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
														
 
															+			       inode->i_mtime.tv_sec);
														
 
															+	btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
														
 
															+				inode->i_mtime.tv_nsec);
														
 
															+
														
 
															+	btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
														
 
															+			       inode->i_ctime.tv_sec);
														
 
															+	btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
														
 
															+				inode->i_ctime.tv_nsec);
														
 
															+
														
 
															+	btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
														
 
															+	btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
														
 
															+	btrfs_set_inode_transid(leaf, item, trans->transid);
														
 
															+	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
														
 
															+	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
														
 
															+	btrfs_set_inode_block_group(leaf, item,
														
 
															+				    BTRFS_I(inode)->block_group->key.objectid);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * copy everything in the in-memory inode into the btree.
														
 
															+ */
														
 
															+int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
														
 
															+			      struct btrfs_root *root,
														
 
															+			      struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_inode_item *inode_item;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int ret;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	ret = btrfs_lookup_inode(trans, root, path,
														
 
															+				 &BTRFS_I(inode)->location, 1);
														
 
															+	if (ret) {
														
 
															+		if (ret > 0)
														
 
															+			ret = -ENOENT;
														
 
															+		goto failed;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	inode_item = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				  struct btrfs_inode_item);
														
 
															+
														
 
															+	fill_inode_item(trans, leaf, inode_item, inode);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+	btrfs_set_inode_last_trans(trans, inode);
														
 
															+	ret = 0;
														
 
															+failed:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * unlink helper that gets used here in inode.c and in the tree logging
														
 
															+ * recovery code.  It remove a link in a directory with a given name, and
														
 
															+ * also drops the back refs in the inode to the directory
														
 
															+ */
														
 
															+int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
														
 
															+		       struct btrfs_root *root,
														
 
															+		       struct inode *dir, struct inode *inode,
														
 
															+		       const char *name, int name_len)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret = 0;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_key key;
														
 
															+	u64 index;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
														
 
															+				    name, name_len, -1);
														
 
															+	if (IS_ERR(di)) {
														
 
															+		ret = PTR_ERR(di);
														
 
															+		goto err;
														
 
															+	}
														
 
															+	if (!di) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto err;
														
 
															+	}
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_dir_item_key_to_cpu(leaf, di, &key);
														
 
															+	ret = btrfs_delete_one_dir_name(trans, root, path, di);
														
 
															+	if (ret)
														
 
															+		goto err;
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	ret = btrfs_del_inode_ref(trans, root, name, name_len,
														
 
															+				  inode->i_ino,
														
 
															+				  dir->i_ino, &index);
														
 
															+	if (ret) {
														
 
															+		printk("failed to delete reference to %.*s, "
														
 
															+		       "inode %lu parent %lu\n", name_len, name,
														
 
															+		       inode->i_ino, dir->i_ino);
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
														
 
															+					 index, name, name_len, -1);
														
 
															+	if (IS_ERR(di)) {
														
 
															+		ret = PTR_ERR(di);
														
 
															+		goto err;
														
 
															+	}
														
 
															+	if (!di) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto err;
														
 
															+	}
														
 
															+	ret = btrfs_delete_one_dir_name(trans, root, path, di);
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
														
 
															+					 inode, dir->i_ino);
														
 
															+	BUG_ON(ret != 0 && ret != -ENOENT);
														
 
															+	if (ret != -ENOENT)
														
 
															+		BTRFS_I(dir)->log_dirty_trans = trans->transid;
														
 
															+
														
 
															+	ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
														
 
															+					   dir, index);
														
 
															+	BUG_ON(ret);
														
 
															+err:
														
 
															+	btrfs_free_path(path);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	btrfs_i_size_write(dir, dir->i_size - name_len * 2);
														
 
															+	inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
														
 
															+	btrfs_update_inode(trans, root, dir);
														
 
															+	btrfs_drop_nlink(inode);
														
 
															+	ret = btrfs_update_inode(trans, root, inode);
														
 
															+	dir->i_sb->s_dirt = 1;
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
														
 
															+{
														
 
															+	struct btrfs_root *root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct inode *inode = dentry->d_inode;
														
 
															+	int ret;
														
 
															+	unsigned long nr = 0;
														
 
															+
														
 
															+	root = BTRFS_I(dir)->root;
														
 
															+
														
 
															+	ret = btrfs_check_free_space(root, 1, 1);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, dir);
														
 
															+	ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
														
 
															+				 dentry->d_name.name, dentry->d_name.len);
														
 
															+
														
 
															+	if (inode->i_nlink == 0)
														
 
															+		ret = btrfs_orphan_add(trans, inode);
														
 
															+
														
 
															+	nr = trans->blocks_used;
														
 
															+
														
 
															+	btrfs_end_transaction_throttle(trans, root);
														
 
															+fail:
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
														
 
															+{
														
 
															+	struct inode *inode = dentry->d_inode;
														
 
															+	int err = 0;
														
 
															+	int ret;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	unsigned long nr = 0;
														
 
															+
														
 
															+	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
														
 
															+		return -ENOTEMPTY;
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_check_free_space(root, 1, 1);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, dir);
														
 
															+
														
 
															+	err = btrfs_orphan_add(trans, inode);
														
 
															+	if (err)
														
 
															+		goto fail_trans;
														
 
															+
														
 
															+	/* now the directory is empty */
														
 
															+	err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
														
 
															+				 dentry->d_name.name, dentry->d_name.len);
														
 
															+	if (!err) {
														
 
															+		btrfs_i_size_write(inode, 0);
														
 
															+	}
														
 
															+
														
 
															+fail_trans:
														
 
															+	nr = trans->blocks_used;
														
 
															+	ret = btrfs_end_transaction_throttle(trans, root);
														
 
															+fail:
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+
														
 
															+	if (ret && !err)
														
 
															+		err = ret;
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when truncating bytes in a file, it is possible to avoid reading
														
 
															+ * the leaves that contain only checksum items.  This can be the
														
 
															+ * majority of the IO required to delete a large file, but it must
														
 
															+ * be done carefully.
														
 
															+ *
														
 
															+ * The keys in the level just above the leaves are checked to make sure
														
 
															+ * the lowest key in a given leaf is a csum key, and starts at an offset
														
 
															+ * after the new  size.
														
 
															+ *
														
 
															+ * Then the key for the next leaf is checked to make sure it also has
														
 
															+ * a checksum item for the same file.  If it does, we know our target leaf
														
 
															+ * contains only checksum items, and it can be safely freed without reading
														
 
															+ * it.
														
 
															+ *
														
 
															+ * This is just an optimization targeted at large files.  It may do
														
 
															+ * nothing.  It will return 0 unless things went badly.
														
 
															+ */
														
 
															+static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
														
 
															+				     struct btrfs_root *root,
														
 
															+				     struct btrfs_path *path,
														
 
															+				     struct inode *inode, u64 new_size)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	int ret;
														
 
															+	int nritems;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_key other_key;
														
 
															+	struct btrfs_leaf_ref *ref;
														
 
															+	u64 leaf_gen;
														
 
															+	u64 leaf_start;
														
 
															+
														
 
															+	path->lowest_level = 1;
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	key.type = BTRFS_CSUM_ITEM_KEY;
														
 
															+	key.offset = new_size;
														
 
															+again:
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (path->nodes[1] == NULL) {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+	btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
														
 
															+	nritems = btrfs_header_nritems(path->nodes[1]);
														
 
															+
														
 
															+	if (!nritems)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (path->slots[1] >= nritems)
														
 
															+		goto next_node;
														
 
															+
														
 
															+	/* did we find a key greater than anything we want to delete? */
														
 
															+	if (found_key.objectid > inode->i_ino ||
														
 
															+	   (found_key.objectid == inode->i_ino && found_key.type > key.type))
														
 
															+		goto out;
														
 
															+
														
 
															+	/* we check the next key in the node to make sure the leave contains
														
 
															+	 * only checksum items.  This comparison doesn't work if our
														
 
															+	 * leaf is the last one in the node
														
 
															+	 */
														
 
															+	if (path->slots[1] + 1 >= nritems) {
														
 
															+next_node:
														
 
															+		/* search forward from the last key in the node, this
														
 
															+		 * will bring us into the next node in the tree
														
 
															+		 */
														
 
															+		btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
														
 
															+
														
 
															+		/* unlikely, but we inc below, so check to be safe */
														
 
															+		if (found_key.offset == (u64)-1)
														
 
															+			goto out;
														
 
															+
														
 
															+		/* search_forward needs a path with locks held, do the
														
 
															+		 * search again for the original key.  It is possible
														
 
															+		 * this will race with a balance and return a path that
														
 
															+		 * we could modify, but this drop is just an optimization
														
 
															+		 * and is allowed to miss some leaves.
														
 
															+		 */
														
 
															+		btrfs_release_path(root, path);
														
 
															+		found_key.offset++;
														
 
															+
														
 
															+		/* setup a max key for search_forward */
														
 
															+		other_key.offset = (u64)-1;
														
 
															+		other_key.type = key.type;
														
 
															+		other_key.objectid = key.objectid;
														
 
															+
														
 
															+		path->keep_locks = 1;
														
 
															+		ret = btrfs_search_forward(root, &found_key, &other_key,
														
 
															+					   path, 0, 0);
														
 
															+		path->keep_locks = 0;
														
 
															+		if (ret || found_key.objectid != key.objectid ||
														
 
															+		    found_key.type != key.type) {
														
 
															+			ret = 0;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		key.offset = found_key.offset;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		cond_resched();
														
 
															+		goto again;
														
 
															+	}
														
 
															+
														
 
															+	/* we know there's one more slot after us in the tree,
														
 
															+	 * read that key so we can verify it is also a checksum item
														
 
															+	 */
														
 
															+	btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
														
 
															+
														
 
															+	if (found_key.objectid < inode->i_ino)
														
 
															+		goto next_key;
														
 
															+
														
 
															+	if (found_key.type != key.type || found_key.offset < new_size)
														
 
															+		goto next_key;
														
 
															+
														
 
															+	/*
														
 
															+	 * if the key for the next leaf isn't a csum key from this objectid,
														
 
															+	 * we can't be sure there aren't good items inside this leaf.
														
 
															+	 * Bail out
														
 
															+	 */
														
 
															+	if (other_key.objectid != inode->i_ino || other_key.type != key.type)
														
 
															+		goto out;
														
 
															+
														
 
															+	leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
														
 
															+	leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
														
 
															+	/*
														
 
															+	 * it is safe to delete this leaf, it contains only
														
 
															+	 * csum items from this inode at an offset >= new_size
														
 
															+	 */
														
 
															+	ret = btrfs_del_leaf(trans, root, path, leaf_start);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (root->ref_cows && leaf_gen < trans->transid) {
														
 
															+		ref = btrfs_alloc_leaf_ref(root, 0);
														
 
															+		if (ref) {
														
 
															+			ref->root_gen = root->root_key.offset;
														
 
															+			ref->bytenr = leaf_start;
														
 
															+			ref->owner = 0;
														
 
															+			ref->generation = leaf_gen;
														
 
															+			ref->nritems = 0;
														
 
															+
														
 
															+			ret = btrfs_add_leaf_ref(root, ref, 0);
														
 
															+			WARN_ON(ret);
														
 
															+			btrfs_free_leaf_ref(root, ref);
														
 
															+		} else {
														
 
															+			WARN_ON(1);
														
 
															+		}
														
 
															+	}
														
 
															+next_key:
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	if (other_key.objectid == inode->i_ino &&
														
 
															+	    other_key.type == key.type && other_key.offset > key.offset) {
														
 
															+		key.offset = other_key.offset;
														
 
															+		cond_resched();
														
 
															+		goto again;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	/* fixup any changes we've made to the path */
														
 
															+	path->lowest_level = 0;
														
 
															+	path->keep_locks = 0;
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this can truncate away extent items, csum items and directory items.
														
 
															+ * It starts at a high offset and removes keys until it can't find
														
 
															+ * any higher than new_size
														
 
															+ *
														
 
															+ * csum items that cross the new i_size are truncated to the new size
														
 
															+ * as well.
														
 
															+ *
														
 
															+ * min_type is the minimum key type to truncate down to.  If set to 0, this
														
 
															+ * will kill all the items on this inode, including the INODE_ITEM_KEY.
														
 
															+ */
														
 
															+noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
														
 
															+					struct btrfs_root *root,
														
 
															+					struct inode *inode,
														
 
															+					u64 new_size, u32 min_type)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	u32 found_type;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	u64 extent_start = 0;
														
 
															+	u64 extent_num_bytes = 0;
														
 
															+	u64 item_end = 0;
														
 
															+	u64 root_gen = 0;
														
 
															+	u64 root_owner = 0;
														
 
															+	int found_extent;
														
 
															+	int del_item;
														
 
															+	int pending_del_nr = 0;
														
 
															+	int pending_del_slot = 0;
														
 
															+	int extent_type = -1;
														
 
															+	u64 mask = root->sectorsize - 1;
														
 
															+
														
 
															+	if (root->ref_cows)
														
 
															+		btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
														
 
															+	path = btrfs_alloc_path();
														
 
															+	path->reada = -1;
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	/* FIXME, add redo link to tree so we don't leak on crash */
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	key.offset = (u64)-1;
														
 
															+	key.type = (u8)-1;
														
 
															+
														
 
															+	btrfs_init_path(path);
														
 
															+
														
 
															+	ret = drop_csum_leaves(trans, root, path, inode, new_size);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+search_again:
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	if (ret < 0) {
														
 
															+		goto error;
														
 
															+	}
														
 
															+	if (ret > 0) {
														
 
															+		/* there are no items in the tree for us to truncate, we're
														
 
															+		 * done
														
 
															+		 */
														
 
															+		if (path->slots[0] == 0) {
														
 
															+			ret = 0;
														
 
															+			goto error;
														
 
															+		}
														
 
															+		path->slots[0]--;
														
 
															+	}
														
 
															+
														
 
															+	while(1) {
														
 
															+		fi = NULL;
														
 
															+		leaf = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		found_type = btrfs_key_type(&found_key);
														
 
															+
														
 
															+		if (found_key.objectid != inode->i_ino)
														
 
															+			break;
														
 
															+
														
 
															+		if (found_type < min_type)
														
 
															+			break;
														
 
															+
														
 
															+		item_end = found_key.offset;
														
 
															+		if (found_type == BTRFS_EXTENT_DATA_KEY) {
														
 
															+			fi = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+					    struct btrfs_file_extent_item);
														
 
															+			extent_type = btrfs_file_extent_type(leaf, fi);
														
 
															+			if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
														
 
															+				item_end +=
														
 
															+				    btrfs_file_extent_num_bytes(leaf, fi);
														
 
															+			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
														
 
															+				struct btrfs_item *item = btrfs_item_nr(leaf,
														
 
															+							        path->slots[0]);
														
 
															+				item_end += btrfs_file_extent_inline_len(leaf,
														
 
															+									 item);
														
 
															+			}
														
 
															+			item_end--;
														
 
															+		}
														
 
															+		if (found_type == BTRFS_CSUM_ITEM_KEY) {
														
 
															+			ret = btrfs_csum_truncate(trans, root, path,
														
 
															+						  new_size);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+		if (item_end < new_size) {
														
 
															+			if (found_type == BTRFS_DIR_ITEM_KEY) {
														
 
															+				found_type = BTRFS_INODE_ITEM_KEY;
														
 
															+			} else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
														
 
															+				found_type = BTRFS_CSUM_ITEM_KEY;
														
 
															+			} else if (found_type == BTRFS_EXTENT_DATA_KEY) {
														
 
															+				found_type = BTRFS_XATTR_ITEM_KEY;
														
 
															+			} else if (found_type == BTRFS_XATTR_ITEM_KEY) {
														
 
															+				found_type = BTRFS_INODE_REF_KEY;
														
 
															+			} else if (found_type) {
														
 
															+				found_type--;
														
 
															+			} else {
														
 
															+				break;
														
 
															+			}
														
 
															+			btrfs_set_key_type(&key, found_type);
														
 
															+			goto next;
														
 
															+		}
														
 
															+		if (found_key.offset >= new_size)
														
 
															+			del_item = 1;
														
 
															+		else
														
 
															+			del_item = 0;
														
 
															+		found_extent = 0;
														
 
															+
														
 
															+		/* FIXME, shrink the extent if the ref count is only 1 */
														
 
															+		if (found_type != BTRFS_EXTENT_DATA_KEY)
														
 
															+			goto delete;
														
 
															+
														
 
															+		if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
														
 
															+			u64 num_dec;
														
 
															+			extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
														
 
															+			if (!del_item) {
														
 
															+				u64 orig_num_bytes =
														
 
															+					btrfs_file_extent_num_bytes(leaf, fi);
														
 
															+				extent_num_bytes = new_size -
														
 
															+					found_key.offset + root->sectorsize - 1;
														
 
															+				extent_num_bytes = extent_num_bytes &
														
 
															+					~((u64)root->sectorsize - 1);
														
 
															+				btrfs_set_file_extent_num_bytes(leaf, fi,
														
 
															+							 extent_num_bytes);
														
 
															+				num_dec = (orig_num_bytes -
														
 
															+					   extent_num_bytes);
														
 
															+				if (root->ref_cows && extent_start != 0)
														
 
															+					inode_sub_bytes(inode, num_dec);
														
 
															+				btrfs_mark_buffer_dirty(leaf);
														
 
															+			} else {
														
 
															+				extent_num_bytes =
														
 
															+					btrfs_file_extent_disk_num_bytes(leaf,
														
 
															+									 fi);
														
 
															+				/* FIXME blocksize != 4096 */
														
 
															+				num_dec = btrfs_file_extent_num_bytes(leaf, fi);
														
 
															+				if (extent_start != 0) {
														
 
															+					found_extent = 1;
														
 
															+					if (root->ref_cows)
														
 
															+						inode_sub_bytes(inode, num_dec);
														
 
															+				}
														
 
															+				root_gen = btrfs_header_generation(leaf);
														
 
															+				root_owner = btrfs_header_owner(leaf);
														
 
															+			}
														
 
															+		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
														
 
															+			if (!del_item) {
														
 
															+				u32 size = new_size - found_key.offset;
														
 
															+
														
 
															+				if (root->ref_cows) {
														
 
															+					inode_sub_bytes(inode, item_end + 1 -
														
 
															+							new_size);
														
 
															+				}
														
 
															+				size =
														
 
															+				    btrfs_file_extent_calc_inline_size(size);
														
 
															+				ret = btrfs_truncate_item(trans, root, path,
														
 
															+							  size, 1);
														
 
															+				BUG_ON(ret);
														
 
															+			} else if (root->ref_cows) {
														
 
															+				inode_sub_bytes(inode, item_end + 1 -
														
 
															+						found_key.offset);
														
 
															+			}
														
 
															+		}
														
 
															+delete:
														
 
															+		if (del_item) {
														
 
															+			if (!pending_del_nr) {
														
 
															+				/* no pending yet, add ourselves */
														
 
															+				pending_del_slot = path->slots[0];
														
 
															+				pending_del_nr = 1;
														
 
															+			} else if (pending_del_nr &&
														
 
															+				   path->slots[0] + 1 == pending_del_slot) {
														
 
															+				/* hop on the pending chunk */
														
 
															+				pending_del_nr++;
														
 
															+				pending_del_slot = path->slots[0];
														
 
															+			} else {
														
 
															+				printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
														
 
															+			}
														
 
															+		} else {
														
 
															+			break;
														
 
															+		}
														
 
															+		if (found_extent) {
														
 
															+			ret = btrfs_free_extent(trans, root, extent_start,
														
 
															+						extent_num_bytes,
														
 
															+						leaf->start, root_owner,
														
 
															+						root_gen, inode->i_ino, 0);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+next:
														
 
															+		if (path->slots[0] == 0) {
														
 
															+			if (pending_del_nr)
														
 
															+				goto del_pending;
														
 
															+			btrfs_release_path(root, path);
														
 
															+			goto search_again;
														
 
															+		}
														
 
															+
														
 
															+		path->slots[0]--;
														
 
															+		if (pending_del_nr &&
														
 
															+		    path->slots[0] + 1 != pending_del_slot) {
														
 
															+			struct btrfs_key debug;
														
 
															+del_pending:
														
 
															+			btrfs_item_key_to_cpu(path->nodes[0], &debug,
														
 
															+					      pending_del_slot);
														
 
															+			ret = btrfs_del_items(trans, root, path,
														
 
															+					      pending_del_slot,
														
 
															+					      pending_del_nr);
														
 
															+			BUG_ON(ret);
														
 
															+			pending_del_nr = 0;
														
 
															+			btrfs_release_path(root, path);
														
 
															+			goto search_again;
														
 
															+		}
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	if (pending_del_nr) {
														
 
															+		ret = btrfs_del_items(trans, root, path, pending_del_slot,
														
 
															+				      pending_del_nr);
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	inode->i_sb->s_dirt = 1;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * taken from block_truncate_page, but does cow as it zeros out
														
 
															+ * any bytes left in the last page in the file.
														
 
															+ */
														
 
															+static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
														
 
															+{
														
 
															+	struct inode *inode = mapping->host;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	char *kaddr;
														
 
															+	u32 blocksize = root->sectorsize;
														
 
															+	pgoff_t index = from >> PAGE_CACHE_SHIFT;
														
 
															+	unsigned offset = from & (PAGE_CACHE_SIZE-1);
														
 
															+	struct page *page;
														
 
															+	int ret = 0;
														
 
															+	u64 page_start;
														
 
															+	u64 page_end;
														
 
															+
														
 
															+	if ((offset & (blocksize - 1)) == 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	ret = -ENOMEM;
														
 
															+again:
														
 
															+	page = grab_cache_page(mapping, index);
														
 
															+	if (!page)
														
 
															+		goto out;
														
 
															+
														
 
															+	page_start = page_offset(page);
														
 
															+	page_end = page_start + PAGE_CACHE_SIZE - 1;
														
 
															+
														
 
															+	if (!PageUptodate(page)) {
														
 
															+		ret = btrfs_readpage(NULL, page);
														
 
															+		lock_page(page);
														
 
															+		if (page->mapping != mapping) {
														
 
															+			unlock_page(page);
														
 
															+			page_cache_release(page);
														
 
															+			goto again;
														
 
															+		}
														
 
															+		if (!PageUptodate(page)) {
														
 
															+			ret = -EIO;
														
 
															+			goto out_unlock;
														
 
															+		}
														
 
															+	}
														
 
															+	wait_on_page_writeback(page);
														
 
															+
														
 
															+	lock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+	set_page_extent_mapped(page);
														
 
															+
														
 
															+	ordered = btrfs_lookup_ordered_extent(inode, page_start);
														
 
															+	if (ordered) {
														
 
															+		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+		unlock_page(page);
														
 
															+		page_cache_release(page);
														
 
															+		btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+		btrfs_put_ordered_extent(ordered);
														
 
															+		goto again;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_extent_delalloc(inode, page_start, page_end);
														
 
															+	ret = 0;
														
 
															+	if (offset != PAGE_CACHE_SIZE) {
														
 
															+		kaddr = kmap(page);
														
 
															+		memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
														
 
															+		flush_dcache_page(page);
														
 
															+		kunmap(page);
														
 
															+	}
														
 
															+	ClearPageChecked(page);
														
 
															+	set_page_dirty(page);
														
 
															+	unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+
														
 
															+out_unlock:
														
 
															+	unlock_page(page);
														
 
															+	page_cache_release(page);
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
														
 
															+{
														
 
															+	struct inode *inode = dentry->d_inode;
														
 
															+	int err;
														
 
															+
														
 
															+	err = inode_change_ok(inode, attr);
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	if (S_ISREG(inode->i_mode) &&
														
 
															+	    attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
														
 
															+		struct btrfs_trans_handle *trans;
														
 
															+		struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+		struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+
														
 
															+		u64 mask = root->sectorsize - 1;
														
 
															+		u64 hole_start = (inode->i_size + mask) & ~mask;
														
 
															+		u64 block_end = (attr->ia_size + mask) & ~mask;
														
 
															+		u64 hole_size;
														
 
															+		u64 alloc_hint = 0;
														
 
															+
														
 
															+		if (attr->ia_size <= hole_start)
														
 
															+			goto out;
														
 
															+
														
 
															+		err = btrfs_check_free_space(root, 1, 0);
														
 
															+		if (err)
														
 
															+			goto fail;
														
 
															+
														
 
															+		btrfs_truncate_page(inode->i_mapping, inode->i_size);
														
 
															+
														
 
															+		hole_size = block_end - hole_start;
														
 
															+		while(1) {
														
 
															+			struct btrfs_ordered_extent *ordered;
														
 
															+			btrfs_wait_ordered_range(inode, hole_start, hole_size);
														
 
															+
														
 
															+			lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
														
 
															+			ordered = btrfs_lookup_ordered_extent(inode, hole_start);
														
 
															+			if (ordered) {
														
 
															+				unlock_extent(io_tree, hole_start,
														
 
															+					      block_end - 1, GFP_NOFS);
														
 
															+				btrfs_put_ordered_extent(ordered);
														
 
															+			} else {
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		trans = btrfs_start_transaction(root, 1);
														
 
															+		btrfs_set_trans_block_group(trans, inode);
														
 
															+		mutex_lock(&BTRFS_I(inode)->extent_mutex);
														
 
															+		err = btrfs_drop_extents(trans, root, inode,
														
 
															+					 hole_start, block_end, hole_start,
														
 
															+					 &alloc_hint);
														
 
															+
														
 
															+		if (alloc_hint != EXTENT_MAP_INLINE) {
														
 
															+			err = btrfs_insert_file_extent(trans, root,
														
 
															+						       inode->i_ino,
														
 
															+						       hole_start, 0, 0,
														
 
															+						       hole_size, 0);
														
 
															+			btrfs_drop_extent_cache(inode, hole_start,
														
 
															+						(u64)-1, 0);
														
 
															+			btrfs_check_file(root, inode);
														
 
															+		}
														
 
															+		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
														
 
															+		btrfs_end_transaction(trans, root);
														
 
															+		unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
														
 
															+		if (err)
														
 
															+			return err;
														
 
															+	}
														
 
															+out:
														
 
															+	err = inode_setattr(inode, attr);
														
 
															+
														
 
															+	if (!err && ((attr->ia_valid & ATTR_MODE)))
														
 
															+		err = btrfs_acl_chmod(inode);
														
 
															+fail:
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+void btrfs_delete_inode(struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	unsigned long nr;
														
 
															+	int ret;
														
 
															+
														
 
															+	truncate_inode_pages(&inode->i_data, 0);
														
 
															+	if (is_bad_inode(inode)) {
														
 
															+		btrfs_orphan_del(NULL, inode);
														
 
															+		goto no_delete;
														
 
															+	}
														
 
															+	btrfs_wait_ordered_range(inode, 0, (u64)-1);
														
 
															+
														
 
															+	btrfs_i_size_write(inode, 0);
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
														
 
															+	if (ret) {
														
 
															+		btrfs_orphan_del(NULL, inode);
														
 
															+		goto no_delete_lock;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_orphan_del(trans, inode);
														
 
															+
														
 
															+	nr = trans->blocks_used;
														
 
															+	clear_inode(inode);
														
 
															+
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return;
														
 
															+
														
 
															+no_delete_lock:
														
 
															+	nr = trans->blocks_used;
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+no_delete:
														
 
															+	clear_inode(inode);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this returns the key found in the dir entry in the location pointer.
														
 
															+ * If no dir entries were found, location->objectid is 0.
														
 
															+ */
														
 
															+static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
														
 
															+			       struct btrfs_key *location)
														
 
															+{
														
 
															+	const char *name = dentry->d_name.name;
														
 
															+	int namelen = dentry->d_name.len;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
														
 
															+				    namelen, 0);
														
 
															+	if (IS_ERR(di))
														
 
															+		ret = PTR_ERR(di);
														
 
															+	if (!di || IS_ERR(di)) {
														
 
															+		goto out_err;
														
 
															+	}
														
 
															+	btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+out_err:
														
 
															+	location->objectid = 0;
														
 
															+	goto out;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when we hit a tree root in a directory, the btrfs part of the inode
														
 
															+ * needs to be changed to reflect the root directory of the tree root.  This
														
 
															+ * is kind of like crossing a mount point.
														
 
															+ */
														
 
															+static int fixup_tree_root_location(struct btrfs_root *root,
														
 
															+			     struct btrfs_key *location,
														
 
															+			     struct btrfs_root **sub_root,
														
 
															+			     struct dentry *dentry)
														
 
															+{
														
 
															+	struct btrfs_root_item *ri;
														
 
															+
														
 
															+	if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
														
 
															+		return 0;
														
 
															+	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
														
 
															+		return 0;
														
 
															+
														
 
															+	*sub_root = btrfs_read_fs_root(root->fs_info, location,
														
 
															+					dentry->d_name.name,
														
 
															+					dentry->d_name.len);
														
 
															+	if (IS_ERR(*sub_root))
														
 
															+		return PTR_ERR(*sub_root);
														
 
															+
														
 
															+	ri = &(*sub_root)->root_item;
														
 
															+	location->objectid = btrfs_root_dirid(ri);
														
 
															+	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
														
 
															+	location->offset = 0;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static noinline void init_btrfs_i(struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_inode *bi = BTRFS_I(inode);
														
 
															+
														
 
															+	bi->i_acl = NULL;
														
 
															+	bi->i_default_acl = NULL;
														
 
															+
														
 
															+	bi->generation = 0;
														
 
															+	bi->last_trans = 0;
														
 
															+	bi->logged_trans = 0;
														
 
															+	bi->delalloc_bytes = 0;
														
 
															+	bi->disk_i_size = 0;
														
 
															+	bi->flags = 0;
														
 
															+	bi->index_cnt = (u64)-1;
														
 
															+	bi->log_dirty_trans = 0;
														
 
															+	extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
														
 
															+	extent_io_tree_init(&BTRFS_I(inode)->io_tree,
														
 
															+			     inode->i_mapping, GFP_NOFS);
														
 
															+	extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
														
 
															+			     inode->i_mapping, GFP_NOFS);
														
 
															+	INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
														
 
															+	btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
														
 
															+	mutex_init(&BTRFS_I(inode)->csum_mutex);
														
 
															+	mutex_init(&BTRFS_I(inode)->extent_mutex);
														
 
															+	mutex_init(&BTRFS_I(inode)->log_mutex);
														
 
															+}
														
 
															+
														
 
															+static int btrfs_init_locked_inode(struct inode *inode, void *p)
														
 
															+{
														
 
															+	struct btrfs_iget_args *args = p;
														
 
															+	inode->i_ino = args->ino;
														
 
															+	init_btrfs_i(inode);
														
 
															+	BTRFS_I(inode)->root = args->root;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_find_actor(struct inode *inode, void *opaque)
														
 
															+{
														
 
															+	struct btrfs_iget_args *args = opaque;
														
 
															+	return (args->ino == inode->i_ino &&
														
 
															+		args->root == BTRFS_I(inode)->root);
														
 
															+}
														
 
															+
														
 
															+struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
														
 
															+			    struct btrfs_root *root, int wait)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	struct btrfs_iget_args args;
														
 
															+	args.ino = objectid;
														
 
															+	args.root = root;
														
 
															+
														
 
															+	if (wait) {
														
 
															+		inode = ilookup5(s, objectid, btrfs_find_actor,
														
 
															+				 (void *)&args);
														
 
															+	} else {
														
 
															+		inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
														
 
															+					(void *)&args);
														
 
															+	}
														
 
															+	return inode;
														
 
															+}
														
 
															+
														
 
															+struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
														
 
															+				struct btrfs_root *root)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	struct btrfs_iget_args args;
														
 
															+	args.ino = objectid;
														
 
															+	args.root = root;
														
 
															+
														
 
															+	inode = iget5_locked(s, objectid, btrfs_find_actor,
														
 
															+			     btrfs_init_locked_inode,
														
 
															+			     (void *)&args);
														
 
															+	return inode;
														
 
															+}
														
 
															+
														
 
															+/* Get an inode object given its location and corresponding root.
														
 
															+ * Returns in *is_new if the inode was read from disk
														
 
															+ */
														
 
															+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
														
 
															+			 struct btrfs_root *root, int *is_new)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+
														
 
															+	inode = btrfs_iget_locked(s, location->objectid, root);
														
 
															+	if (!inode)
														
 
															+		return ERR_PTR(-EACCES);
														
 
															+
														
 
															+	if (inode->i_state & I_NEW) {
														
 
															+		BTRFS_I(inode)->root = root;
														
 
															+		memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
														
 
															+		btrfs_read_locked_inode(inode);
														
 
															+		unlock_new_inode(inode);
														
 
															+		if (is_new)
														
 
															+			*is_new = 1;
														
 
															+	} else {
														
 
															+		if (is_new)
														
 
															+			*is_new = 0;
														
 
															+	}
														
 
															+
														
 
															+	return inode;
														
 
															+}
														
 
															+
														
 
															+static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
														
 
															+				   struct nameidata *nd)
														
 
															+{
														
 
															+	struct inode * inode;
														
 
															+	struct btrfs_inode *bi = BTRFS_I(dir);
														
 
															+	struct btrfs_root *root = bi->root;
														
 
															+	struct btrfs_root *sub_root = root;
														
 
															+	struct btrfs_key location;
														
 
															+	int ret, new, do_orphan = 0;
														
 
															+
														
 
															+	if (dentry->d_name.len > BTRFS_NAME_LEN)
														
 
															+		return ERR_PTR(-ENAMETOOLONG);
														
 
															+
														
 
															+	ret = btrfs_inode_by_name(dir, dentry, &location);
														
 
															+
														
 
															+	if (ret < 0)
														
 
															+		return ERR_PTR(ret);
														
 
															+
														
 
															+	inode = NULL;
														
 
															+	if (location.objectid) {
														
 
															+		ret = fixup_tree_root_location(root, &location, &sub_root,
														
 
															+						dentry);
														
 
															+		if (ret < 0)
														
 
															+			return ERR_PTR(ret);
														
 
															+		if (ret > 0)
														
 
															+			return ERR_PTR(-ENOENT);
														
 
															+		inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
														
 
															+		if (IS_ERR(inode))
														
 
															+			return ERR_CAST(inode);
														
 
															+
														
 
															+		/* the inode and parent dir are two different roots */
														
 
															+		if (new && root != sub_root) {
														
 
															+			igrab(inode);
														
 
															+			sub_root->inode = inode;
														
 
															+			do_orphan = 1;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (unlikely(do_orphan))
														
 
															+		btrfs_orphan_cleanup(sub_root);
														
 
															+
														
 
															+	return d_splice_alias(inode, dentry);
														
 
															+}
														
 
															+
														
 
															+static unsigned char btrfs_filetype_table[] = {
														
 
															+	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
														
 
															+};
														
 
															+
														
 
															+static int btrfs_real_readdir(struct file *filp, void *dirent,
														
 
															+			      filldir_t filldir)
														
 
															+{
														
 
															+	struct inode *inode = filp->f_dentry->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_item *item;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	u32 nritems;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int slot;
														
 
															+	int advance;
														
 
															+	unsigned char d_type;
														
 
															+	int over = 0;
														
 
															+	u32 di_cur;
														
 
															+	u32 di_total;
														
 
															+	u32 di_len;
														
 
															+	int key_type = BTRFS_DIR_INDEX_KEY;
														
 
															+	char tmp_name[32];
														
 
															+	char *name_ptr;
														
 
															+	int name_len;
														
 
															+
														
 
															+	/* FIXME, use a real flag for deciding about the key type */
														
 
															+	if (root->fs_info->tree_root == root)
														
 
															+		key_type = BTRFS_DIR_ITEM_KEY;
														
 
															+
														
 
															+	/* special case for "." */
														
 
															+	if (filp->f_pos == 0) {
														
 
															+		over = filldir(dirent, ".", 1,
														
 
															+			       1, inode->i_ino,
														
 
															+			       DT_DIR);
														
 
															+		if (over)
														
 
															+			return 0;
														
 
															+		filp->f_pos = 1;
														
 
															+	}
														
 
															+	/* special case for .., just use the back ref */
														
 
															+	if (filp->f_pos == 1) {
														
 
															+		u64 pino = parent_ino(filp->f_path.dentry);
														
 
															+		over = filldir(dirent, "..", 2,
														
 
															+			       2, pino, DT_DIR);
														
 
															+		if (over)
														
 
															+			return 0;
														
 
															+		filp->f_pos = 2;
														
 
															+	}
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	path->reada = 2;
														
 
															+
														
 
															+	btrfs_set_key_type(&key, key_type);
														
 
															+	key.offset = filp->f_pos;
														
 
															+	key.objectid = inode->i_ino;
														
 
															+
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto err;
														
 
															+	advance = 0;
														
 
															+
														
 
															+	while (1) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		slot = path->slots[0];
														
 
															+		if (advance || slot >= nritems) {
														
 
															+			if (slot >= nritems - 1) {
														
 
															+				ret = btrfs_next_leaf(root, path);
														
 
															+				if (ret)
														
 
															+					break;
														
 
															+				leaf = path->nodes[0];
														
 
															+				nritems = btrfs_header_nritems(leaf);
														
 
															+				slot = path->slots[0];
														
 
															+			} else {
														
 
															+				slot++;
														
 
															+				path->slots[0]++;
														
 
															+			}
														
 
															+		}
														
 
															+		advance = 1;
														
 
															+		item = btrfs_item_nr(leaf, slot);
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
														
 
															+
														
 
															+		if (found_key.objectid != key.objectid)
														
 
															+			break;
														
 
															+		if (btrfs_key_type(&found_key) != key_type)
														
 
															+			break;
														
 
															+		if (found_key.offset < filp->f_pos)
														
 
															+			continue;
														
 
															+
														
 
															+		filp->f_pos = found_key.offset;
														
 
															+
														
 
															+		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
														
 
															+		di_cur = 0;
														
 
															+		di_total = btrfs_item_size(leaf, item);
														
 
															+
														
 
															+		while (di_cur < di_total) {
														
 
															+			struct btrfs_key location;
														
 
															+
														
 
															+			name_len = btrfs_dir_name_len(leaf, di);
														
 
															+			if (name_len <= sizeof(tmp_name)) {
														
 
															+				name_ptr = tmp_name;
														
 
															+			} else {
														
 
															+				name_ptr = kmalloc(name_len, GFP_NOFS);
														
 
															+				if (!name_ptr) {
														
 
															+					ret = -ENOMEM;
														
 
															+					goto err;
														
 
															+				}
														
 
															+			}
														
 
															+			read_extent_buffer(leaf, name_ptr,
														
 
															+					   (unsigned long)(di + 1), name_len);
														
 
															+
														
 
															+			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
														
 
															+			btrfs_dir_item_key_to_cpu(leaf, di, &location);
														
 
															+			over = filldir(dirent, name_ptr, name_len,
														
 
															+				       found_key.offset, location.objectid,
														
 
															+				       d_type);
														
 
															+
														
 
															+			if (name_ptr != tmp_name)
														
 
															+				kfree(name_ptr);
														
 
															+
														
 
															+			if (over)
														
 
															+				goto nopos;
														
 
															+
														
 
															+			di_len = btrfs_dir_name_len(leaf, di) +
														
 
															+				 btrfs_dir_data_len(leaf, di) + sizeof(*di);
														
 
															+			di_cur += di_len;
														
 
															+			di = (struct btrfs_dir_item *)((char *)di + di_len);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* Reached end of directory/root. Bump pos past the last item. */
														
 
															+	if (key_type == BTRFS_DIR_INDEX_KEY)
														
 
															+		filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
														
 
															+	else
														
 
															+		filp->f_pos++;
														
 
															+nopos:
														
 
															+	ret = 0;
														
 
															+err:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_write_inode(struct inode *inode, int wait)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (root->fs_info->closing > 1)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (wait) {
														
 
															+		trans = btrfs_join_transaction(root, 1);
														
 
															+		btrfs_set_trans_block_group(trans, inode);
														
 
															+		ret = btrfs_commit_transaction(trans, root);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * This is somewhat expensive, updating the tree every time the
														
 
															+ * inode changes.  But, it is most likely to find the inode in cache.
														
 
															+ * FIXME, needs more benchmarking...there are no reasons other than performance
														
 
															+ * to keep or drop this code.
														
 
															+ */
														
 
															+void btrfs_dirty_inode(struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+
														
 
															+	trans = btrfs_join_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	btrfs_update_inode(trans, root, inode);
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * find the highest existing sequence number in a directory
														
 
															+ * and then set the in-memory index_cnt variable to reflect
														
 
															+ * free sequence numbers
														
 
															+ */
														
 
															+static int btrfs_set_inode_index_count(struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_key key, found_key;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int ret;
														
 
															+
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	/* FIXME: we should be able to handle this */
														
 
															+	if (ret == 0)
														
 
															+		goto out;
														
 
															+	ret = 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * MAGIC NUMBER EXPLANATION:
														
 
															+	 * since we search a directory based on f_pos we have to start at 2
														
 
															+	 * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
														
 
															+	 * else has to start at 2
														
 
															+	 */
														
 
															+	if (path->slots[0] == 0) {
														
 
															+		BTRFS_I(inode)->index_cnt = 2;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	path->slots[0]--;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+
														
 
															+	if (found_key.objectid != inode->i_ino ||
														
 
															+	    btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
														
 
															+		BTRFS_I(inode)->index_cnt = 2;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	BTRFS_I(inode)->index_cnt = found_key.offset + 1;
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper to find a free sequence number in a given directory.  This current
														
 
															+ * code is very simple, later versions will do smarter things in the btree
														
 
															+ */
														
 
															+static int btrfs_set_inode_index(struct inode *dir, struct inode *inode,
														
 
															+				 u64 *index)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (BTRFS_I(dir)->index_cnt == (u64)-1) {
														
 
															+		ret = btrfs_set_inode_index_count(dir);
														
 
															+		if (ret) {
														
 
															+			return ret;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	*index = BTRFS_I(dir)->index_cnt;
														
 
															+	BTRFS_I(dir)->index_cnt++;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
														
 
															+				     struct btrfs_root *root,
														
 
															+				     struct inode *dir,
														
 
															+				     const char *name, int name_len,
														
 
															+				     u64 ref_objectid,
														
 
															+				     u64 objectid,
														
 
															+				     struct btrfs_block_group_cache *group,
														
 
															+				     int mode, u64 *index)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	struct btrfs_inode_item *inode_item;
														
 
															+	struct btrfs_block_group_cache *new_inode_group;
														
 
															+	struct btrfs_key *location;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_inode_ref *ref;
														
 
															+	struct btrfs_key key[2];
														
 
															+	u32 sizes[2];
														
 
															+	unsigned long ptr;
														
 
															+	int ret;
														
 
															+	int owner;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	inode = new_inode(root->fs_info->sb);
														
 
															+	if (!inode)
														
 
															+		return ERR_PTR(-ENOMEM);
														
 
															+
														
 
															+	if (dir) {
														
 
															+		ret = btrfs_set_inode_index(dir, inode, index);
														
 
															+		if (ret)
														
 
															+			return ERR_PTR(ret);
														
 
															+	}
														
 
															+	/*
														
 
															+	 * index_cnt is ignored for everything but a dir,
														
 
															+	 * btrfs_get_inode_index_count has an explanation for the magic
														
 
															+	 * number
														
 
															+	 */
														
 
															+	init_btrfs_i(inode);
														
 
															+	BTRFS_I(inode)->index_cnt = 2;
														
 
															+	BTRFS_I(inode)->root = root;
														
 
															+	BTRFS_I(inode)->generation = trans->transid;
														
 
															+
														
 
															+	if (mode & S_IFDIR)
														
 
															+		owner = 0;
														
 
															+	else
														
 
															+		owner = 1;
														
 
															+	new_inode_group = btrfs_find_block_group(root, group, 0,
														
 
															+				       BTRFS_BLOCK_GROUP_METADATA, owner);
														
 
															+	if (!new_inode_group) {
														
 
															+		printk("find_block group failed\n");
														
 
															+		new_inode_group = group;
														
 
															+	}
														
 
															+	BTRFS_I(inode)->block_group = new_inode_group;
														
 
															+
														
 
															+	key[0].objectid = objectid;
														
 
															+	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
														
 
															+	key[0].offset = 0;
														
 
															+
														
 
															+	key[1].objectid = objectid;
														
 
															+	btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
														
 
															+	key[1].offset = ref_objectid;
														
 
															+
														
 
															+	sizes[0] = sizeof(struct btrfs_inode_item);
														
 
															+	sizes[1] = name_len + sizeof(*ref);
														
 
															+
														
 
															+	ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
														
 
															+	if (ret != 0)
														
 
															+		goto fail;
														
 
															+
														
 
															+	if (objectid > root->highest_inode)
														
 
															+		root->highest_inode = objectid;
														
 
															+
														
 
															+	inode->i_uid = current->fsuid;
														
 
															+	inode->i_gid = current->fsgid;
														
 
															+	inode->i_mode = mode;
														
 
															+	inode->i_ino = objectid;
														
 
															+	inode_set_bytes(inode, 0);
														
 
															+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
														
 
															+	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+				  struct btrfs_inode_item);
														
 
															+	fill_inode_item(trans, path->nodes[0], inode_item, inode);
														
 
															+
														
 
															+	ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
														
 
															+			     struct btrfs_inode_ref);
														
 
															+	btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
														
 
															+	btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
														
 
															+	ptr = (unsigned long)(ref + 1);
														
 
															+	write_extent_buffer(path->nodes[0], name, ptr, name_len);
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+	btrfs_free_path(path);
														
 
															+
														
 
															+	location = &BTRFS_I(inode)->location;
														
 
															+	location->objectid = objectid;
														
 
															+	location->offset = 0;
														
 
															+	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
														
 
															+
														
 
															+	insert_inode_hash(inode);
														
 
															+	return inode;
														
 
															+fail:
														
 
															+	if (dir)
														
 
															+		BTRFS_I(dir)->index_cnt--;
														
 
															+	btrfs_free_path(path);
														
 
															+	return ERR_PTR(ret);
														
 
															+}
														
 
															+
														
 
															+static inline u8 btrfs_inode_type(struct inode *inode)
														
 
															+{
														
 
															+	return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * utility function to add 'inode' into 'parent_inode' with
														
 
															+ * a give name and a given sequence number.
														
 
															+ * if 'add_backref' is true, also insert a backref from the
														
 
															+ * inode to the parent directory.
														
 
															+ */
														
 
															+int btrfs_add_link(struct btrfs_trans_handle *trans,
														
 
															+		   struct inode *parent_inode, struct inode *inode,
														
 
															+		   const char *name, int name_len, int add_backref, u64 index)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_root *root = BTRFS_I(parent_inode)->root;
														
 
															+
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
														
 
															+	key.offset = 0;
														
 
															+
														
 
															+	ret = btrfs_insert_dir_item(trans, root, name, name_len,
														
 
															+				    parent_inode->i_ino,
														
 
															+				    &key, btrfs_inode_type(inode),
														
 
															+				    index);
														
 
															+	if (ret == 0) {
														
 
															+		if (add_backref) {
														
 
															+			ret = btrfs_insert_inode_ref(trans, root,
														
 
															+						     name, name_len,
														
 
															+						     inode->i_ino,
														
 
															+						     parent_inode->i_ino,
														
 
															+						     index);
														
 
															+		}
														
 
															+		btrfs_i_size_write(parent_inode, parent_inode->i_size +
														
 
															+				   name_len * 2);
														
 
															+		parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
														
 
															+		ret = btrfs_update_inode(trans, root, parent_inode);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
														
 
															+			    struct dentry *dentry, struct inode *inode,
														
 
															+			    int backref, u64 index)
														
 
															+{
														
 
															+	int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
														
 
															+				 inode, dentry->d_name.name,
														
 
															+				 dentry->d_name.len, backref, index);
														
 
															+	if (!err) {
														
 
															+		d_instantiate(dentry, inode);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	if (err > 0)
														
 
															+		err = -EEXIST;
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
														
 
															+			int mode, dev_t rdev)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	struct inode *inode = NULL;
														
 
															+	int err;
														
 
															+	int drop_inode = 0;
														
 
															+	u64 objectid;
														
 
															+	unsigned long nr = 0;
														
 
															+	u64 index = 0;
														
 
															+
														
 
															+	if (!new_valid_dev(rdev))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	err = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (err)
														
 
															+		goto fail;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, dir);
														
 
															+
														
 
															+	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
														
 
															+	if (err) {
														
 
															+		err = -ENOSPC;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
														
 
															+				dentry->d_name.len,
														
 
															+				dentry->d_parent->d_inode->i_ino, objectid,
														
 
															+				BTRFS_I(dir)->block_group, mode, &index);
														
 
															+	err = PTR_ERR(inode);
														
 
															+	if (IS_ERR(inode))
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	err = btrfs_init_acl(inode, dir);
														
 
															+	if (err) {
														
 
															+		drop_inode = 1;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
														
 
															+	if (err)
														
 
															+		drop_inode = 1;
														
 
															+	else {
														
 
															+		inode->i_op = &btrfs_special_inode_operations;
														
 
															+		init_special_inode(inode, inode->i_mode, rdev);
														
 
															+		btrfs_update_inode(trans, root, inode);
														
 
															+	}
														
 
															+	dir->i_sb->s_dirt = 1;
														
 
															+	btrfs_update_inode_block_group(trans, inode);
														
 
															+	btrfs_update_inode_block_group(trans, dir);
														
 
															+out_unlock:
														
 
															+	nr = trans->blocks_used;
														
 
															+	btrfs_end_transaction_throttle(trans, root);
														
 
															+fail:
														
 
															+	if (drop_inode) {
														
 
															+		inode_dec_link_count(inode);
														
 
															+		iput(inode);
														
 
															+	}
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_create(struct inode *dir, struct dentry *dentry,
														
 
															+			int mode, struct nameidata *nd)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	struct inode *inode = NULL;
														
 
															+	int err;
														
 
															+	int drop_inode = 0;
														
 
															+	unsigned long nr = 0;
														
 
															+	u64 objectid;
														
 
															+	u64 index = 0;
														
 
															+
														
 
															+	err = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (err)
														
 
															+		goto fail;
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, dir);
														
 
															+
														
 
															+	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
														
 
															+	if (err) {
														
 
															+		err = -ENOSPC;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
														
 
															+				dentry->d_name.len,
														
 
															+				dentry->d_parent->d_inode->i_ino,
														
 
															+				objectid, BTRFS_I(dir)->block_group, mode,
														
 
															+				&index);
														
 
															+	err = PTR_ERR(inode);
														
 
															+	if (IS_ERR(inode))
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	err = btrfs_init_acl(inode, dir);
														
 
															+	if (err) {
														
 
															+		drop_inode = 1;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
														
 
															+	if (err)
														
 
															+		drop_inode = 1;
														
 
															+	else {
														
 
															+		inode->i_mapping->a_ops = &btrfs_aops;
														
 
															+		inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
														
 
															+		inode->i_fop = &btrfs_file_operations;
														
 
															+		inode->i_op = &btrfs_file_inode_operations;
														
 
															+		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
														
 
															+	}
														
 
															+	dir->i_sb->s_dirt = 1;
														
 
															+	btrfs_update_inode_block_group(trans, inode);
														
 
															+	btrfs_update_inode_block_group(trans, dir);
														
 
															+out_unlock:
														
 
															+	nr = trans->blocks_used;
														
 
															+	btrfs_end_transaction_throttle(trans, root);
														
 
															+fail:
														
 
															+	if (drop_inode) {
														
 
															+		inode_dec_link_count(inode);
														
 
															+		iput(inode);
														
 
															+	}
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
														
 
															+		      struct dentry *dentry)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	struct inode *inode = old_dentry->d_inode;
														
 
															+	u64 index;
														
 
															+	unsigned long nr = 0;
														
 
															+	int err;
														
 
															+	int drop_inode = 0;
														
 
															+
														
 
															+	if (inode->i_nlink == 0)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	btrfs_inc_nlink(inode);
														
 
															+	err = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (err)
														
 
															+		goto fail;
														
 
															+	err = btrfs_set_inode_index(dir, inode, &index);
														
 
															+	if (err)
														
 
															+		goto fail;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, dir);
														
 
															+	atomic_inc(&inode->i_count);
														
 
															+
														
 
															+	err = btrfs_add_nondir(trans, dentry, inode, 1, index);
														
 
															+
														
 
															+	if (err)
														
 
															+		drop_inode = 1;
														
 
															+
														
 
															+	dir->i_sb->s_dirt = 1;
														
 
															+	btrfs_update_inode_block_group(trans, dir);
														
 
															+	err = btrfs_update_inode(trans, root, inode);
														
 
															+
														
 
															+	if (err)
														
 
															+		drop_inode = 1;
														
 
															+
														
 
															+	nr = trans->blocks_used;
														
 
															+	btrfs_end_transaction_throttle(trans, root);
														
 
															+fail:
														
 
															+	if (drop_inode) {
														
 
															+		inode_dec_link_count(inode);
														
 
															+		iput(inode);
														
 
															+	}
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
														
 
															+{
														
 
															+	struct inode *inode = NULL;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	int err = 0;
														
 
															+	int drop_on_err = 0;
														
 
															+	u64 objectid = 0;
														
 
															+	u64 index = 0;
														
 
															+	unsigned long nr = 1;
														
 
															+
														
 
															+	err = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (err)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, dir);
														
 
															+
														
 
															+	if (IS_ERR(trans)) {
														
 
															+		err = PTR_ERR(trans);
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
														
 
															+	if (err) {
														
 
															+		err = -ENOSPC;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
														
 
															+				dentry->d_name.len,
														
 
															+				dentry->d_parent->d_inode->i_ino, objectid,
														
 
															+				BTRFS_I(dir)->block_group, S_IFDIR | mode,
														
 
															+				&index);
														
 
															+	if (IS_ERR(inode)) {
														
 
															+		err = PTR_ERR(inode);
														
 
															+		goto out_fail;
														
 
															+	}
														
 
															+
														
 
															+	drop_on_err = 1;
														
 
															+
														
 
															+	err = btrfs_init_acl(inode, dir);
														
 
															+	if (err)
														
 
															+		goto out_fail;
														
 
															+
														
 
															+	inode->i_op = &btrfs_dir_inode_operations;
														
 
															+	inode->i_fop = &btrfs_dir_file_operations;
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+
														
 
															+	btrfs_i_size_write(inode, 0);
														
 
															+	err = btrfs_update_inode(trans, root, inode);
														
 
															+	if (err)
														
 
															+		goto out_fail;
														
 
															+
														
 
															+	err = btrfs_add_link(trans, dentry->d_parent->d_inode,
														
 
															+				 inode, dentry->d_name.name,
														
 
															+				 dentry->d_name.len, 0, index);
														
 
															+	if (err)
														
 
															+		goto out_fail;
														
 
															+
														
 
															+	d_instantiate(dentry, inode);
														
 
															+	drop_on_err = 0;
														
 
															+	dir->i_sb->s_dirt = 1;
														
 
															+	btrfs_update_inode_block_group(trans, inode);
														
 
															+	btrfs_update_inode_block_group(trans, dir);
														
 
															+
														
 
															+out_fail:
														
 
															+	nr = trans->blocks_used;
														
 
															+	btrfs_end_transaction_throttle(trans, root);
														
 
															+
														
 
															+out_unlock:
														
 
															+	if (drop_on_err)
														
 
															+		iput(inode);
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/* helper for btfs_get_extent.  Given an existing extent in the tree,
														
 
															+ * and an extent that you want to insert, deal with overlap and insert
														
 
															+ * the new extent into the tree.
														
 
															+ */
														
 
															+static int merge_extent_mapping(struct extent_map_tree *em_tree,
														
 
															+				struct extent_map *existing,
														
 
															+				struct extent_map *em,
														
 
															+				u64 map_start, u64 map_len)
														
 
															+{
														
 
															+	u64 start_diff;
														
 
															+
														
 
															+	BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
														
 
															+	start_diff = map_start - em->start;
														
 
															+	em->start = map_start;
														
 
															+	em->len = map_len;
														
 
															+	if (em->block_start < EXTENT_MAP_LAST_BYTE)
														
 
															+		em->block_start += start_diff;
														
 
															+	return add_extent_mapping(em_tree, em);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * a bit scary, this does extent mapping from logical file offset to the disk.
														
 
															+ * the ugly parts come from merging extents from the disk with the
														
 
															+ * in-ram representation.  This gets more complex because of the data=ordered code,
														
 
															+ * where the in-ram extents might be locked pending data=ordered completion.
														
 
															+ *
														
 
															+ * This also copies inline extents directly into the page.
														
 
															+ */
														
 
															+struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
														
 
															+				    size_t pg_offset, u64 start, u64 len,
														
 
															+				    int create)
														
 
															+{
														
 
															+	int ret;
														
 
															+	int err = 0;
														
 
															+	u64 bytenr;
														
 
															+	u64 extent_start = 0;
														
 
															+	u64 extent_end = 0;
														
 
															+	u64 objectid = inode->i_ino;
														
 
															+	u32 found_type;
														
 
															+	struct btrfs_path *path = NULL;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_file_extent_item *item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_map *em = NULL;
														
 
															+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	struct btrfs_trans_handle *trans = NULL;
														
 
															+
														
 
															+again:
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	em = lookup_extent_mapping(em_tree, start, len);
														
 
															+	if (em)
														
 
															+		em->bdev = root->fs_info->fs_devices->latest_bdev;
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+	if (em) {
														
 
															+		if (em->start > start || em->start + em->len <= start)
														
 
															+			free_extent_map(em);
														
 
															+		else if (em->block_start == EXTENT_MAP_INLINE && page)
														
 
															+			free_extent_map(em);
														
 
															+		else
														
 
															+			goto out;
														
 
															+	}
														
 
															+	em = alloc_extent_map(GFP_NOFS);
														
 
															+	if (!em) {
														
 
															+		err = -ENOMEM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	em->bdev = root->fs_info->fs_devices->latest_bdev;
														
 
															+	em->start = EXTENT_MAP_HOLE;
														
 
															+	em->len = (u64)-1;
														
 
															+
														
 
															+	if (!path) {
														
 
															+		path = btrfs_alloc_path();
														
 
															+		BUG_ON(!path);
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_lookup_file_extent(trans, root, path,
														
 
															+				       objectid, start, trans != NULL);
														
 
															+	if (ret < 0) {
														
 
															+		err = ret;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (ret != 0) {
														
 
															+		if (path->slots[0] == 0)
														
 
															+			goto not_found;
														
 
															+		path->slots[0]--;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	item = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+			      struct btrfs_file_extent_item);
														
 
															+	/* are we inside the extent that was found? */
														
 
															+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+	found_type = btrfs_key_type(&found_key);
														
 
															+	if (found_key.objectid != objectid ||
														
 
															+	    found_type != BTRFS_EXTENT_DATA_KEY) {
														
 
															+		goto not_found;
														
 
															+	}
														
 
															+
														
 
															+	found_type = btrfs_file_extent_type(leaf, item);
														
 
															+	extent_start = found_key.offset;
														
 
															+	if (found_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+		extent_end = extent_start +
														
 
															+		       btrfs_file_extent_num_bytes(leaf, item);
														
 
															+		err = 0;
														
 
															+		if (start < extent_start || start >= extent_end) {
														
 
															+			em->start = start;
														
 
															+			if (start < extent_start) {
														
 
															+				if (start + len <= extent_start)
														
 
															+					goto not_found;
														
 
															+				em->len = extent_end - extent_start;
														
 
															+			} else {
														
 
															+				em->len = len;
														
 
															+			}
														
 
															+			goto not_found_em;
														
 
															+		}
														
 
															+		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
														
 
															+		if (bytenr == 0) {
														
 
															+			em->start = extent_start;
														
 
															+			em->len = extent_end - extent_start;
														
 
															+			em->block_start = EXTENT_MAP_HOLE;
														
 
															+			goto insert;
														
 
															+		}
														
 
															+		bytenr += btrfs_file_extent_offset(leaf, item);
														
 
															+		em->block_start = bytenr;
														
 
															+		em->start = extent_start;
														
 
															+		em->len = extent_end - extent_start;
														
 
															+		goto insert;
														
 
															+	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
														
 
															+		u64 page_start;
														
 
															+		unsigned long ptr;
														
 
															+		char *map;
														
 
															+		size_t size;
														
 
															+		size_t extent_offset;
														
 
															+		size_t copy_size;
														
 
															+
														
 
															+		size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
														
 
															+						    path->slots[0]));
														
 
															+		extent_end = (extent_start + size + root->sectorsize - 1) &
														
 
															+			~((u64)root->sectorsize - 1);
														
 
															+		if (start < extent_start || start >= extent_end) {
														
 
															+			em->start = start;
														
 
															+			if (start < extent_start) {
														
 
															+				if (start + len <= extent_start)
														
 
															+					goto not_found;
														
 
															+				em->len = extent_end - extent_start;
														
 
															+			} else {
														
 
															+				em->len = len;
														
 
															+			}
														
 
															+			goto not_found_em;
														
 
															+		}
														
 
															+		em->block_start = EXTENT_MAP_INLINE;
														
 
															+
														
 
															+		if (!page) {
														
 
															+			em->start = extent_start;
														
 
															+			em->len = size;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		page_start = page_offset(page) + pg_offset;
														
 
															+		extent_offset = page_start - extent_start;
														
 
															+		copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
														
 
															+				size - extent_offset);
														
 
															+		em->start = extent_start + extent_offset;
														
 
															+		em->len = (copy_size + root->sectorsize - 1) &
														
 
															+			~((u64)root->sectorsize - 1);
														
 
															+		map = kmap(page);
														
 
															+		ptr = btrfs_file_extent_inline_start(item) + extent_offset;
														
 
															+		if (create == 0 && !PageUptodate(page)) {
														
 
															+			read_extent_buffer(leaf, map + pg_offset, ptr,
														
 
															+					   copy_size);
														
 
															+			flush_dcache_page(page);
														
 
															+		} else if (create && PageUptodate(page)) {
														
 
															+			if (!trans) {
														
 
															+				kunmap(page);
														
 
															+				free_extent_map(em);
														
 
															+				em = NULL;
														
 
															+				btrfs_release_path(root, path);
														
 
															+				trans = btrfs_join_transaction(root, 1);
														
 
															+				goto again;
														
 
															+			}
														
 
															+			write_extent_buffer(leaf, map + pg_offset, ptr,
														
 
															+					    copy_size);
														
 
															+			btrfs_mark_buffer_dirty(leaf);
														
 
															+		}
														
 
															+		kunmap(page);
														
 
															+		set_extent_uptodate(io_tree, em->start,
														
 
															+				    extent_map_end(em) - 1, GFP_NOFS);
														
 
															+		goto insert;
														
 
															+	} else {
														
 
															+		printk("unkknown found_type %d\n", found_type);
														
 
															+		WARN_ON(1);
														
 
															+	}
														
 
															+not_found:
														
 
															+	em->start = start;
														
 
															+	em->len = len;
														
 
															+not_found_em:
														
 
															+	em->block_start = EXTENT_MAP_HOLE;
														
 
															+insert:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	if (em->start > start || extent_map_end(em) <= start) {
														
 
															+		printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
														
 
															+		err = -EIO;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	err = 0;
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	ret = add_extent_mapping(em_tree, em);
														
 
															+	/* it is possible that someone inserted the extent into the tree
														
 
															+	 * while we had the lock dropped.  It is also possible that
														
 
															+	 * an overlapping map exists in the tree
														
 
															+	 */
														
 
															+	if (ret == -EEXIST) {
														
 
															+		struct extent_map *existing;
														
 
															+
														
 
															+		ret = 0;
														
 
															+
														
 
															+		existing = lookup_extent_mapping(em_tree, start, len);
														
 
															+		if (existing && (existing->start > start ||
														
 
															+		    existing->start + existing->len <= start)) {
														
 
															+			free_extent_map(existing);
														
 
															+			existing = NULL;
														
 
															+		}
														
 
															+		if (!existing) {
														
 
															+			existing = lookup_extent_mapping(em_tree, em->start,
														
 
															+							 em->len);
														
 
															+			if (existing) {
														
 
															+				err = merge_extent_mapping(em_tree, existing,
														
 
															+							   em, start,
														
 
															+							   root->sectorsize);
														
 
															+				free_extent_map(existing);
														
 
															+				if (err) {
														
 
															+					free_extent_map(em);
														
 
															+					em = NULL;
														
 
															+				}
														
 
															+			} else {
														
 
															+				err = -EIO;
														
 
															+				printk("failing to insert %Lu %Lu\n",
														
 
															+				       start, len);
														
 
															+				free_extent_map(em);
														
 
															+				em = NULL;
														
 
															+			}
														
 
															+		} else {
														
 
															+			free_extent_map(em);
														
 
															+			em = existing;
														
 
															+			err = 0;
														
 
															+		}
														
 
															+	}
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+out:
														
 
															+	if (path)
														
 
															+		btrfs_free_path(path);
														
 
															+	if (trans) {
														
 
															+		ret = btrfs_end_transaction(trans, root);
														
 
															+		if (!err) {
														
 
															+			err = ret;
														
 
															+		}
														
 
															+	}
														
 
															+	if (err) {
														
 
															+		free_extent_map(em);
														
 
															+		WARN_ON(1);
														
 
															+		return ERR_PTR(err);
														
 
															+	}
														
 
															+	return em;
														
 
															+}
														
 
															+
														
 
															+static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
														
 
															+			const struct iovec *iov, loff_t offset,
														
 
															+			unsigned long nr_segs)
														
 
															+{
														
 
															+	return -EINVAL;
														
 
															+}
														
 
															+
														
 
															+static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
														
 
															+{
														
 
															+	return extent_bmap(mapping, iblock, btrfs_get_extent);
														
 
															+}
														
 
															+
														
 
															+int btrfs_readpage(struct file *file, struct page *page)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	return extent_read_full_page(tree, page, btrfs_get_extent);
														
 
															+}
														
 
															+
														
 
															+static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+
														
 
															+
														
 
															+	if (current->flags & PF_MEMALLOC) {
														
 
															+		redirty_page_for_writepage(wbc, page);
														
 
															+		unlock_page(page);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
														
 
															+}
														
 
															+
														
 
															+int btrfs_writepages(struct address_space *mapping,
														
 
															+		     struct writeback_control *wbc)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	tree = &BTRFS_I(mapping->host)->io_tree;
														
 
															+	return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
														
 
															+}
														
 
															+
														
 
															+static int
														
 
															+btrfs_readpages(struct file *file, struct address_space *mapping,
														
 
															+		struct list_head *pages, unsigned nr_pages)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	tree = &BTRFS_I(mapping->host)->io_tree;
														
 
															+	return extent_readpages(tree, mapping, pages, nr_pages,
														
 
															+				btrfs_get_extent);
														
 
															+}
														
 
															+static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	struct extent_map_tree *map;
														
 
															+	int ret;
														
 
															+
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	map = &BTRFS_I(page->mapping->host)->extent_tree;
														
 
															+	ret = try_release_extent_mapping(map, tree, page, gfp_flags);
														
 
															+	if (ret == 1) {
														
 
															+		ClearPagePrivate(page);
														
 
															+		set_page_private(page, 0);
														
 
															+		page_cache_release(page);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
														
 
															+{
														
 
															+	if (PageWriteback(page) || PageDirty(page))
														
 
															+		return 0;
														
 
															+	return __btrfs_releasepage(page, gfp_flags);
														
 
															+}
														
 
															+
														
 
															+static void btrfs_invalidatepage(struct page *page, unsigned long offset)
														
 
															+{
														
 
															+	struct extent_io_tree *tree;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	u64 page_start = page_offset(page);
														
 
															+	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
														
 
															+
														
 
															+	wait_on_page_writeback(page);
														
 
															+	tree = &BTRFS_I(page->mapping->host)->io_tree;
														
 
															+	if (offset) {
														
 
															+		btrfs_releasepage(page, GFP_NOFS);
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	lock_extent(tree, page_start, page_end, GFP_NOFS);
														
 
															+	ordered = btrfs_lookup_ordered_extent(page->mapping->host,
														
 
															+					   page_offset(page));
														
 
															+	if (ordered) {
														
 
															+		/*
														
 
															+		 * IO on this page will never be started, so we need
														
 
															+		 * to account for any ordered extents now
														
 
															+		 */
														
 
															+		clear_extent_bit(tree, page_start, page_end,
														
 
															+				 EXTENT_DIRTY | EXTENT_DELALLOC |
														
 
															+				 EXTENT_LOCKED, 1, 0, GFP_NOFS);
														
 
															+		btrfs_finish_ordered_io(page->mapping->host,
														
 
															+					page_start, page_end);
														
 
															+		btrfs_put_ordered_extent(ordered);
														
 
															+		lock_extent(tree, page_start, page_end, GFP_NOFS);
														
 
															+	}
														
 
															+	clear_extent_bit(tree, page_start, page_end,
														
 
															+		 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
														
 
															+		 EXTENT_ORDERED,
														
 
															+		 1, 1, GFP_NOFS);
														
 
															+	__btrfs_releasepage(page, GFP_NOFS);
														
 
															+
														
 
															+	ClearPageChecked(page);
														
 
															+	if (PagePrivate(page)) {
														
 
															+		ClearPagePrivate(page);
														
 
															+		set_page_private(page, 0);
														
 
															+		page_cache_release(page);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * btrfs_page_mkwrite() is not allowed to change the file size as it gets
														
 
															+ * called from a page fault handler when a page is first dirtied. Hence we must
														
 
															+ * be careful to check for EOF conditions here. We set the page up correctly
														
 
															+ * for a written page which means we get ENOSPC checking when writing into
														
 
															+ * holes and correct delalloc and unwritten extent mapping on filesystems that
														
 
															+ * support these features.
														
 
															+ *
														
 
															+ * We are not allowed to take the i_mutex here so we have to play games to
														
 
															+ * protect against truncate races as the page could now be beyond EOF.  Because
														
 
															+ * vmtruncate() writes the inode size before removing pages, once we have the
														
 
															+ * page lock we can determine safely if the page is beyond EOF. If it is not
														
 
															+ * beyond EOF, then the page is guaranteed safe against truncation until we
														
 
															+ * unlock the page.
														
 
															+ */
														
 
															+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
														
 
															+{
														
 
															+	struct inode *inode = fdentry(vma->vm_file)->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	char *kaddr;
														
 
															+	unsigned long zero_start;
														
 
															+	loff_t size;
														
 
															+	int ret;
														
 
															+	u64 page_start;
														
 
															+	u64 page_end;
														
 
															+
														
 
															+	ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	ret = -EINVAL;
														
 
															+again:
														
 
															+	lock_page(page);
														
 
															+	size = i_size_read(inode);
														
 
															+	page_start = page_offset(page);
														
 
															+	page_end = page_start + PAGE_CACHE_SIZE - 1;
														
 
															+
														
 
															+	if ((page->mapping != inode->i_mapping) ||
														
 
															+	    (page_start >= size)) {
														
 
															+		/* page got truncated out from underneath us */
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+	wait_on_page_writeback(page);
														
 
															+
														
 
															+	lock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+	set_page_extent_mapped(page);
														
 
															+
														
 
															+	/*
														
 
															+	 * we can't set the delalloc bits if there are pending ordered
														
 
															+	 * extents.  Drop our locks and wait for them to finish
														
 
															+	 */
														
 
															+	ordered = btrfs_lookup_ordered_extent(inode, page_start);
														
 
															+	if (ordered) {
														
 
															+		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+		unlock_page(page);
														
 
															+		btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+		btrfs_put_ordered_extent(ordered);
														
 
															+		goto again;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_extent_delalloc(inode, page_start, page_end);
														
 
															+	ret = 0;
														
 
															+
														
 
															+	/* page is wholly or partially inside EOF */
														
 
															+	if (page_start + PAGE_CACHE_SIZE > size)
														
 
															+		zero_start = size & ~PAGE_CACHE_MASK;
														
 
															+	else
														
 
															+		zero_start = PAGE_CACHE_SIZE;
														
 
															+
														
 
															+	if (zero_start != PAGE_CACHE_SIZE) {
														
 
															+		kaddr = kmap(page);
														
 
															+		memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
														
 
															+		flush_dcache_page(page);
														
 
															+		kunmap(page);
														
 
															+	}
														
 
															+	ClearPageChecked(page);
														
 
															+	set_page_dirty(page);
														
 
															+	unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+
														
 
															+out_unlock:
														
 
															+	unlock_page(page);
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void btrfs_truncate(struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	int ret;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	unsigned long nr;
														
 
															+	u64 mask = root->sectorsize - 1;
														
 
															+
														
 
															+	if (!S_ISREG(inode->i_mode))
														
 
															+		return;
														
 
															+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
														
 
															+		return;
														
 
															+
														
 
															+	btrfs_truncate_page(inode->i_mapping, inode->i_size);
														
 
															+	btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	btrfs_i_size_write(inode, inode->i_size);
														
 
															+
														
 
															+	ret = btrfs_orphan_add(trans, inode);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	/* FIXME, add redo link to tree so we don't leak on crash */
														
 
															+	ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
														
 
															+				      BTRFS_EXTENT_DATA_KEY);
														
 
															+	btrfs_update_inode(trans, root, inode);
														
 
															+
														
 
															+	ret = btrfs_orphan_del(trans, inode);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+out:
														
 
															+	nr = trans->blocks_used;
														
 
															+	ret = btrfs_end_transaction_throttle(trans, root);
														
 
															+	BUG_ON(ret);
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Invalidate a single dcache entry at the root of the filesystem.
														
 
															+ * Needed after creation of snapshot or subvolume.
														
 
															+ */
														
 
															+void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
														
 
															+				  int namelen)
														
 
															+{
														
 
															+	struct dentry *alias, *entry;
														
 
															+	struct qstr qstr;
														
 
															+
														
 
															+	alias = d_find_alias(root->fs_info->sb->s_root->d_inode);
														
 
															+	if (alias) {
														
 
															+		qstr.name = name;
														
 
															+		qstr.len = namelen;
														
 
															+		/* change me if btrfs ever gets a d_hash operation */
														
 
															+		qstr.hash = full_name_hash(qstr.name, qstr.len);
														
 
															+		entry = d_lookup(alias, &qstr);
														
 
															+		dput(alias);
														
 
															+		if (entry) {
														
 
															+			d_invalidate(entry);
														
 
															+			dput(entry);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * create a new subvolume directory/inode (helper for the ioctl).
														
 
															+ */
														
 
															+int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
														
 
															+		struct btrfs_trans_handle *trans, u64 new_dirid,
														
 
															+		struct btrfs_block_group_cache *block_group)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	int error;
														
 
															+	u64 index = 0;
														
 
															+
														
 
															+	inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
														
 
															+				new_dirid, block_group, S_IFDIR | 0700, &index);
														
 
															+	if (IS_ERR(inode))
														
 
															+		return PTR_ERR(inode);
														
 
															+	inode->i_op = &btrfs_dir_inode_operations;
														
 
															+	inode->i_fop = &btrfs_dir_file_operations;
														
 
															+	new_root->inode = inode;
														
 
															+
														
 
															+	inode->i_nlink = 1;
														
 
															+	btrfs_i_size_write(inode, 0);
														
 
															+
														
 
															+	error = btrfs_update_inode(trans, new_root, inode);
														
 
															+	if (error)
														
 
															+		return error;
														
 
															+
														
 
															+	d_instantiate(dentry, inode);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* helper function for file defrag and space balancing.  This
														
 
															+ * forces readahead on a given range of bytes in an inode
														
 
															+ */
														
 
															+unsigned long btrfs_force_ra(struct address_space *mapping,
														
 
															+			      struct file_ra_state *ra, struct file *file,
														
 
															+			      pgoff_t offset, pgoff_t last_index)
														
 
															+{
														
 
															+	pgoff_t req_size = last_index - offset + 1;
														
 
															+
														
 
															+	page_cache_sync_readahead(mapping, ra, file, offset, req_size);
														
 
															+	return offset + req_size;
														
 
															+}
														
 
															+
														
 
															+struct inode *btrfs_alloc_inode(struct super_block *sb)
														
 
															+{
														
 
															+	struct btrfs_inode *ei;
														
 
															+
														
 
															+	ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
														
 
															+	if (!ei)
														
 
															+		return NULL;
														
 
															+	ei->last_trans = 0;
														
 
															+	ei->logged_trans = 0;
														
 
															+	btrfs_ordered_inode_tree_init(&ei->ordered_tree);
														
 
															+	ei->i_acl = BTRFS_ACL_NOT_CACHED;
														
 
															+	ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
														
 
															+	INIT_LIST_HEAD(&ei->i_orphan);
														
 
															+	return &ei->vfs_inode;
														
 
															+}
														
 
															+
														
 
															+void btrfs_destroy_inode(struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	WARN_ON(!list_empty(&inode->i_dentry));
														
 
															+	WARN_ON(inode->i_data.nrpages);
														
 
															+
														
 
															+	if (BTRFS_I(inode)->i_acl &&
														
 
															+	    BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
														
 
															+		posix_acl_release(BTRFS_I(inode)->i_acl);
														
 
															+	if (BTRFS_I(inode)->i_default_acl &&
														
 
															+	    BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
														
 
															+		posix_acl_release(BTRFS_I(inode)->i_default_acl);
														
 
															+
														
 
															+	spin_lock(&BTRFS_I(inode)->root->list_lock);
														
 
															+	if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
														
 
															+		printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
														
 
															+		       " list\n", inode->i_ino);
														
 
															+		dump_stack();
														
 
															+	}
														
 
															+	spin_unlock(&BTRFS_I(inode)->root->list_lock);
														
 
															+
														
 
															+	while(1) {
														
 
															+		ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
														
 
															+		if (!ordered)
														
 
															+			break;
														
 
															+		else {
														
 
															+			printk("found ordered extent %Lu %Lu\n",
														
 
															+			       ordered->file_offset, ordered->len);
														
 
															+			btrfs_remove_ordered_extent(inode, ordered);
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+		}
														
 
															+	}
														
 
															+	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
														
 
															+	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
														
 
															+}
														
 
															+
														
 
															+static void init_once(void *foo)
														
 
															+{
														
 
															+	struct btrfs_inode *ei = (struct btrfs_inode *) foo;
														
 
															+
														
 
															+	inode_init_once(&ei->vfs_inode);
														
 
															+}
														
 
															+
														
 
															+void btrfs_destroy_cachep(void)
														
 
															+{
														
 
															+	if (btrfs_inode_cachep)
														
 
															+		kmem_cache_destroy(btrfs_inode_cachep);
														
 
															+	if (btrfs_trans_handle_cachep)
														
 
															+		kmem_cache_destroy(btrfs_trans_handle_cachep);
														
 
															+	if (btrfs_transaction_cachep)
														
 
															+		kmem_cache_destroy(btrfs_transaction_cachep);
														
 
															+	if (btrfs_bit_radix_cachep)
														
 
															+		kmem_cache_destroy(btrfs_bit_radix_cachep);
														
 
															+	if (btrfs_path_cachep)
														
 
															+		kmem_cache_destroy(btrfs_path_cachep);
														
 
															+}
														
 
															+
														
 
															+struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
														
 
															+				       unsigned long extra_flags,
														
 
															+				       void (*ctor)(void *))
														
 
															+{
														
 
															+	return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
														
 
															+				 SLAB_MEM_SPREAD | extra_flags), ctor);
														
 
															+}
														
 
															+
														
 
															+int btrfs_init_cachep(void)
														
 
															+{
														
 
															+	btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
														
 
															+					  sizeof(struct btrfs_inode),
														
 
															+					  0, init_once);
														
 
															+	if (!btrfs_inode_cachep)
														
 
															+		goto fail;
														
 
															+	btrfs_trans_handle_cachep =
														
 
															+			btrfs_cache_create("btrfs_trans_handle_cache",
														
 
															+					   sizeof(struct btrfs_trans_handle),
														
 
															+					   0, NULL);
														
 
															+	if (!btrfs_trans_handle_cachep)
														
 
															+		goto fail;
														
 
															+	btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
														
 
															+					     sizeof(struct btrfs_transaction),
														
 
															+					     0, NULL);
														
 
															+	if (!btrfs_transaction_cachep)
														
 
															+		goto fail;
														
 
															+	btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
														
 
															+					 sizeof(struct btrfs_path),
														
 
															+					 0, NULL);
														
 
															+	if (!btrfs_path_cachep)
														
 
															+		goto fail;
														
 
															+	btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
														
 
															+					      SLAB_DESTROY_BY_RCU, NULL);
														
 
															+	if (!btrfs_bit_radix_cachep)
														
 
															+		goto fail;
														
 
															+	return 0;
														
 
															+fail:
														
 
															+	btrfs_destroy_cachep();
														
 
															+	return -ENOMEM;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_getattr(struct vfsmount *mnt,
														
 
															+			 struct dentry *dentry, struct kstat *stat)
														
 
															+{
														
 
															+	struct inode *inode = dentry->d_inode;
														
 
															+	generic_fillattr(inode, stat);
														
 
															+	stat->blksize = PAGE_CACHE_SIZE;
														
 
															+	stat->blocks = (inode_get_bytes(inode) +
														
 
															+			BTRFS_I(inode)->delalloc_bytes) >> 9;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
														
 
															+			   struct inode * new_dir,struct dentry *new_dentry)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = BTRFS_I(old_dir)->root;
														
 
															+	struct inode *new_inode = new_dentry->d_inode;
														
 
															+	struct inode *old_inode = old_dentry->d_inode;
														
 
															+	struct timespec ctime = CURRENT_TIME;
														
 
															+	u64 index = 0;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (S_ISDIR(old_inode->i_mode) && new_inode &&
														
 
															+	    new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
														
 
															+		return -ENOTEMPTY;
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (ret)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, new_dir);
														
 
															+
														
 
															+	btrfs_inc_nlink(old_dentry->d_inode);
														
 
															+	old_dir->i_ctime = old_dir->i_mtime = ctime;
														
 
															+	new_dir->i_ctime = new_dir->i_mtime = ctime;
														
 
															+	old_inode->i_ctime = ctime;
														
 
															+
														
 
															+	ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
														
 
															+				 old_dentry->d_name.name,
														
 
															+				 old_dentry->d_name.len);
														
 
															+	if (ret)
														
 
															+		goto out_fail;
														
 
															+
														
 
															+	if (new_inode) {
														
 
															+		new_inode->i_ctime = CURRENT_TIME;
														
 
															+		ret = btrfs_unlink_inode(trans, root, new_dir,
														
 
															+					 new_dentry->d_inode,
														
 
															+					 new_dentry->d_name.name,
														
 
															+					 new_dentry->d_name.len);
														
 
															+		if (ret)
														
 
															+			goto out_fail;
														
 
															+		if (new_inode->i_nlink == 0) {
														
 
															+			ret = btrfs_orphan_add(trans, new_dentry->d_inode);
														
 
															+			if (ret)
														
 
															+				goto out_fail;
														
 
															+		}
														
 
															+
														
 
															+	}
														
 
															+	ret = btrfs_set_inode_index(new_dir, old_inode, &index);
														
 
															+	if (ret)
														
 
															+		goto out_fail;
														
 
															+
														
 
															+	ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
														
 
															+			     old_inode, new_dentry->d_name.name,
														
 
															+			     new_dentry->d_name.len, 1, index);
														
 
															+	if (ret)
														
 
															+		goto out_fail;
														
 
															+
														
 
															+out_fail:
														
 
															+	btrfs_end_transaction_throttle(trans, root);
														
 
															+out_unlock:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * some fairly slow code that needs optimization. This walks the list
														
 
															+ * of all the inodes with pending delalloc and forces them to disk.
														
 
															+ */
														
 
															+int btrfs_start_delalloc_inodes(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct list_head *head = &root->fs_info->delalloc_inodes;
														
 
															+	struct btrfs_inode *binode;
														
 
															+	struct inode *inode;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
														
 
															+	while(!list_empty(head)) {
														
 
															+		binode = list_entry(head->next, struct btrfs_inode,
														
 
															+				    delalloc_inodes);
														
 
															+		inode = igrab(&binode->vfs_inode);
														
 
															+		if (!inode)
														
 
															+			list_del_init(&binode->delalloc_inodes);
														
 
															+		spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
														
 
															+		if (inode) {
														
 
															+			filemap_flush(inode->i_mapping);
														
 
															+			iput(inode);
														
 
															+		}
														
 
															+		cond_resched();
														
 
															+		spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
														
 
															+	}
														
 
															+	spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
														
 
															+
														
 
															+	/* the filemap_flush will queue IO into the worker threads, but
														
 
															+	 * we have to make sure the IO is actually started and that
														
 
															+	 * ordered extents get created before we return
														
 
															+	 */
														
 
															+	atomic_inc(&root->fs_info->async_submit_draining);
														
 
															+	while(atomic_read(&root->fs_info->nr_async_submits)) {
														
 
															+		wait_event(root->fs_info->async_submit_wait,
														
 
															+		   (atomic_read(&root->fs_info->nr_async_submits) == 0));
														
 
															+	}
														
 
															+	atomic_dec(&root->fs_info->async_submit_draining);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
														
 
															+			 const char *symname)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = BTRFS_I(dir)->root;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	struct inode *inode = NULL;
														
 
															+	int err;
														
 
															+	int drop_inode = 0;
														
 
															+	u64 objectid;
														
 
															+	u64 index = 0 ;
														
 
															+	int name_len;
														
 
															+	int datasize;
														
 
															+	unsigned long ptr;
														
 
															+	struct btrfs_file_extent_item *ei;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	unsigned long nr = 0;
														
 
															+
														
 
															+	name_len = strlen(symname) + 1;
														
 
															+	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
														
 
															+		return -ENAMETOOLONG;
														
 
															+
														
 
															+	err = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (err)
														
 
															+		goto out_fail;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, dir);
														
 
															+
														
 
															+	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
														
 
															+	if (err) {
														
 
															+		err = -ENOSPC;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
														
 
															+				dentry->d_name.len,
														
 
															+				dentry->d_parent->d_inode->i_ino, objectid,
														
 
															+				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
														
 
															+				&index);
														
 
															+	err = PTR_ERR(inode);
														
 
															+	if (IS_ERR(inode))
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	err = btrfs_init_acl(inode, dir);
														
 
															+	if (err) {
														
 
															+		drop_inode = 1;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
														
 
															+	if (err)
														
 
															+		drop_inode = 1;
														
 
															+	else {
														
 
															+		inode->i_mapping->a_ops = &btrfs_aops;
														
 
															+		inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
														
 
															+		inode->i_fop = &btrfs_file_operations;
														
 
															+		inode->i_op = &btrfs_file_inode_operations;
														
 
															+		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
														
 
															+	}
														
 
															+	dir->i_sb->s_dirt = 1;
														
 
															+	btrfs_update_inode_block_group(trans, inode);
														
 
															+	btrfs_update_inode_block_group(trans, dir);
														
 
															+	if (drop_inode)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	key.offset = 0;
														
 
															+	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
														
 
															+	datasize = btrfs_file_extent_calc_inline_size(name_len);
														
 
															+	err = btrfs_insert_empty_item(trans, root, path, &key,
														
 
															+				      datasize);
														
 
															+	if (err) {
														
 
															+		drop_inode = 1;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+	leaf = path->nodes[0];
														
 
															+	ei = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+			    struct btrfs_file_extent_item);
														
 
															+	btrfs_set_file_extent_generation(leaf, ei, trans->transid);
														
 
															+	btrfs_set_file_extent_type(leaf, ei,
														
 
															+				   BTRFS_FILE_EXTENT_INLINE);
														
 
															+	ptr = btrfs_file_extent_inline_start(ei);
														
 
															+	write_extent_buffer(leaf, symname, ptr, name_len);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+	btrfs_free_path(path);
														
 
															+
														
 
															+	inode->i_op = &btrfs_symlink_inode_operations;
														
 
															+	inode->i_mapping->a_ops = &btrfs_symlink_aops;
														
 
															+	inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
														
 
															+	btrfs_i_size_write(inode, name_len - 1);
														
 
															+	err = btrfs_update_inode(trans, root, inode);
														
 
															+	if (err)
														
 
															+		drop_inode = 1;
														
 
															+
														
 
															+out_unlock:
														
 
															+	nr = trans->blocks_used;
														
 
															+	btrfs_end_transaction_throttle(trans, root);
														
 
															+out_fail:
														
 
															+	if (drop_inode) {
														
 
															+		inode_dec_link_count(inode);
														
 
															+		iput(inode);
														
 
															+	}
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_set_page_dirty(struct page *page)
														
 
															+{
														
 
															+	return __set_page_dirty_nobuffers(page);
														
 
															+}
														
 
															+
														
 
															+static int btrfs_permission(struct inode *inode, int mask)
														
 
															+{
														
 
															+	if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
														
 
															+		return -EACCES;
														
 
															+	return generic_permission(inode, mask, btrfs_check_acl);
														
 
															+}
														
 
															+
														
 
															+static struct inode_operations btrfs_dir_inode_operations = {
														
 
															+	.lookup		= btrfs_lookup,
														
 
															+	.create		= btrfs_create,
														
 
															+	.unlink		= btrfs_unlink,
														
 
															+	.link		= btrfs_link,
														
 
															+	.mkdir		= btrfs_mkdir,
														
 
															+	.rmdir		= btrfs_rmdir,
														
 
															+	.rename		= btrfs_rename,
														
 
															+	.symlink	= btrfs_symlink,
														
 
															+	.setattr	= btrfs_setattr,
														
 
															+	.mknod		= btrfs_mknod,
														
 
															+	.setxattr	= btrfs_setxattr,
														
 
															+	.getxattr	= btrfs_getxattr,
														
 
															+	.listxattr	= btrfs_listxattr,
														
 
															+	.removexattr	= btrfs_removexattr,
														
 
															+	.permission	= btrfs_permission,
														
 
															+};
														
 
															+static struct inode_operations btrfs_dir_ro_inode_operations = {
														
 
															+	.lookup		= btrfs_lookup,
														
 
															+	.permission	= btrfs_permission,
														
 
															+};
														
 
															+static struct file_operations btrfs_dir_file_operations = {
														
 
															+	.llseek		= generic_file_llseek,
														
 
															+	.read		= generic_read_dir,
														
 
															+	.readdir	= btrfs_real_readdir,
														
 
															+	.unlocked_ioctl	= btrfs_ioctl,
														
 
															+#ifdef CONFIG_COMPAT
														
 
															+	.compat_ioctl	= btrfs_ioctl,
														
 
															+#endif
														
 
															+	.release        = btrfs_release_file,
														
 
															+	.fsync		= btrfs_sync_file,
														
 
															+};
														
 
															+
														
 
															+static struct extent_io_ops btrfs_extent_io_ops = {
														
 
															+	.fill_delalloc = run_delalloc_range,
														
 
															+	.submit_bio_hook = btrfs_submit_bio_hook,
														
 
															+	.merge_bio_hook = btrfs_merge_bio_hook,
														
 
															+	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
														
 
															+	.writepage_end_io_hook = btrfs_writepage_end_io_hook,
														
 
															+	.writepage_start_hook = btrfs_writepage_start_hook,
														
 
															+	.readpage_io_failed_hook = btrfs_io_failed_hook,
														
 
															+	.set_bit_hook = btrfs_set_bit_hook,
														
 
															+	.clear_bit_hook = btrfs_clear_bit_hook,
														
 
															+};
														
 
															+
														
 
															+static struct address_space_operations btrfs_aops = {
														
 
															+	.readpage	= btrfs_readpage,
														
 
															+	.writepage	= btrfs_writepage,
														
 
															+	.writepages	= btrfs_writepages,
														
 
															+	.readpages	= btrfs_readpages,
														
 
															+	.sync_page	= block_sync_page,
														
 
															+	.bmap		= btrfs_bmap,
														
 
															+	.direct_IO	= btrfs_direct_IO,
														
 
															+	.invalidatepage = btrfs_invalidatepage,
														
 
															+	.releasepage	= btrfs_releasepage,
														
 
															+	.set_page_dirty	= btrfs_set_page_dirty,
														
 
															+};
														
 
															+
														
 
															+static struct address_space_operations btrfs_symlink_aops = {
														
 
															+	.readpage	= btrfs_readpage,
														
 
															+	.writepage	= btrfs_writepage,
														
 
															+	.invalidatepage = btrfs_invalidatepage,
														
 
															+	.releasepage	= btrfs_releasepage,
														
 
															+};
														
 
															+
														
 
															+static struct inode_operations btrfs_file_inode_operations = {
														
 
															+	.truncate	= btrfs_truncate,
														
 
															+	.getattr	= btrfs_getattr,
														
 
															+	.setattr	= btrfs_setattr,
														
 
															+	.setxattr	= btrfs_setxattr,
														
 
															+	.getxattr	= btrfs_getxattr,
														
 
															+	.listxattr      = btrfs_listxattr,
														
 
															+	.removexattr	= btrfs_removexattr,
														
 
															+	.permission	= btrfs_permission,
														
 
															+};
														
 
															+static struct inode_operations btrfs_special_inode_operations = {
														
 
															+	.getattr	= btrfs_getattr,
														
 
															+	.setattr	= btrfs_setattr,
														
 
															+	.permission	= btrfs_permission,
														
 
															+	.setxattr	= btrfs_setxattr,
														
 
															+	.getxattr	= btrfs_getxattr,
														
 
															+	.listxattr	= btrfs_listxattr,
														
 
															+	.removexattr	= btrfs_removexattr,
														
 
															+};
														
 
															+static struct inode_operations btrfs_symlink_inode_operations = {
														
 
															+	.readlink	= generic_readlink,
														
 
															+	.follow_link	= page_follow_link_light,
														
 
															+	.put_link	= page_put_link,
														
 
															+	.permission	= btrfs_permission,
														
 
															+};
														
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -0,0 +1,863 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/bio.h>
														
 
															+#include <linux/buffer_head.h>
														
 
															+#include <linux/file.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/fsnotify.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/highmem.h>
														
 
															+#include <linux/time.h>
														
 
															+#include <linux/init.h>
														
 
															+#include <linux/string.h>
														
 
															+#include <linux/smp_lock.h>
														
 
															+#include <linux/backing-dev.h>
														
 
															+#include <linux/mount.h>
														
 
															+#include <linux/mpage.h>
														
 
															+#include <linux/namei.h>
														
 
															+#include <linux/swap.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/statfs.h>
														
 
															+#include <linux/compat.h>
														
 
															+#include <linux/bit_spinlock.h>
														
 
															+#include <linux/security.h>
														
 
															+#include <linux/version.h>
														
 
															+#include <linux/xattr.h>
														
 
															+#include <linux/vmalloc.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "ioctl.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "volumes.h"
														
 
															+#include "locking.h"
														
 
															+
														
 
															+
														
 
															+
														
 
															+static noinline int create_subvol(struct btrfs_root *root,
														
 
															+				  struct dentry *dentry,
														
 
															+				  char *name, int namelen)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_root_item root_item;
														
 
															+	struct btrfs_inode_item *inode_item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_root *new_root = root;
														
 
															+	struct inode *dir;
														
 
															+	int ret;
														
 
															+	int err;
														
 
															+	u64 objectid;
														
 
															+	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
														
 
															+	unsigned long nr = 1;
														
 
															+
														
 
															+	ret = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (ret)
														
 
															+		goto fail_commit;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+
														
 
															+	ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
														
 
															+				       0, &objectid);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
														
 
															+				      objectid, trans->transid, 0, 0, 0);
														
 
															+	if (IS_ERR(leaf)) {
														
 
															+		ret = PTR_ERR(leaf);
														
 
															+		goto fail;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_header_nritems(leaf, 0);
														
 
															+	btrfs_set_header_level(leaf, 0);
														
 
															+	btrfs_set_header_bytenr(leaf, leaf->start);
														
 
															+	btrfs_set_header_generation(leaf, trans->transid);
														
 
															+	btrfs_set_header_owner(leaf, objectid);
														
 
															+
														
 
															+	write_extent_buffer(leaf, root->fs_info->fsid,
														
 
															+			    (unsigned long)btrfs_header_fsid(leaf),
														
 
															+			    BTRFS_FSID_SIZE);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+	inode_item = &root_item.inode;
														
 
															+	memset(inode_item, 0, sizeof(*inode_item));
														
 
															+	inode_item->generation = cpu_to_le64(1);
														
 
															+	inode_item->size = cpu_to_le64(3);
														
 
															+	inode_item->nlink = cpu_to_le32(1);
														
 
															+	inode_item->nbytes = cpu_to_le64(root->leafsize);
														
 
															+	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
														
 
															+
														
 
															+	btrfs_set_root_bytenr(&root_item, leaf->start);
														
 
															+	btrfs_set_root_level(&root_item, 0);
														
 
															+	btrfs_set_root_refs(&root_item, 1);
														
 
															+	btrfs_set_root_used(&root_item, 0);
														
 
															+
														
 
															+	memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
														
 
															+	root_item.drop_level = 0;
														
 
															+
														
 
															+	btrfs_tree_unlock(leaf);
														
 
															+	free_extent_buffer(leaf);
														
 
															+	leaf = NULL;
														
 
															+
														
 
															+	btrfs_set_root_dirid(&root_item, new_dirid);
														
 
															+
														
 
															+	key.objectid = objectid;
														
 
															+	key.offset = 1;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
														
 
															+	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
														
 
															+				&root_item);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	/*
														
 
															+	 * insert the directory item
														
 
															+	 */
														
 
															+	key.offset = (u64)-1;
														
 
															+	dir = root->fs_info->sb->s_root->d_inode;
														
 
															+	ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
														
 
															+				    name, namelen, dir->i_ino, &key,
														
 
															+				    BTRFS_FT_DIR, 0);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
														
 
															+			     name, namelen, objectid,
														
 
															+			     root->fs_info->sb->s_root->d_inode->i_ino, 0);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	ret = btrfs_commit_transaction(trans, root);
														
 
															+	if (ret)
														
 
															+		goto fail_commit;
														
 
															+
														
 
															+	new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
														
 
															+	BUG_ON(!new_root);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(new_root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+
														
 
															+	ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid,
														
 
															+				       BTRFS_I(dir)->block_group);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+fail:
														
 
															+	nr = trans->blocks_used;
														
 
															+	err = btrfs_commit_transaction(trans, new_root);
														
 
															+	if (err && !ret)
														
 
															+		ret = err;
														
 
															+fail_commit:
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
														
 
															+{
														
 
															+	struct btrfs_pending_snapshot *pending_snapshot;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	int ret;
														
 
															+	int err;
														
 
															+	unsigned long nr = 0;
														
 
															+
														
 
															+	if (!root->ref_cows)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	ret = btrfs_check_free_space(root, 1, 0);
														
 
															+	if (ret)
														
 
															+		goto fail_unlock;
														
 
															+
														
 
															+	pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
														
 
															+	if (!pending_snapshot) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto fail_unlock;
														
 
															+	}
														
 
															+	pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
														
 
															+	if (!pending_snapshot->name) {
														
 
															+		ret = -ENOMEM;
														
 
															+		kfree(pending_snapshot);
														
 
															+		goto fail_unlock;
														
 
															+	}
														
 
															+	memcpy(pending_snapshot->name, name, namelen);
														
 
															+	pending_snapshot->name[namelen] = '\0';
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+	pending_snapshot->root = root;
														
 
															+	list_add(&pending_snapshot->list,
														
 
															+		 &trans->transaction->pending_snapshots);
														
 
															+	ret = btrfs_update_inode(trans, root, root->inode);
														
 
															+	err = btrfs_commit_transaction(trans, root);
														
 
															+
														
 
															+fail_unlock:
														
 
															+	btrfs_btree_balance_dirty(root, nr);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* copy of may_create in fs/namei.c() */
														
 
															+static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
														
 
															+{
														
 
															+	if (child->d_inode)
														
 
															+		return -EEXIST;
														
 
															+	if (IS_DEADDIR(dir))
														
 
															+		return -ENOENT;
														
 
															+	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Create a new subvolume below @parent.  This is largely modeled after
														
 
															+ * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
														
 
															+ * inside this filesystem so it's quite a bit simpler.
														
 
															+ */
														
 
															+static noinline int btrfs_mksubvol(struct path *parent, char *name,
														
 
															+				   int mode, int namelen)
														
 
															+{
														
 
															+	struct dentry *dentry;
														
 
															+	int error;
														
 
															+
														
 
															+	mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
														
 
															+
														
 
															+	dentry = lookup_one_len(name, parent->dentry, namelen);
														
 
															+	error = PTR_ERR(dentry);
														
 
															+	if (IS_ERR(dentry))
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	error = -EEXIST;
														
 
															+	if (dentry->d_inode)
														
 
															+		goto out_dput;
														
 
															+
														
 
															+	if (!IS_POSIXACL(parent->dentry->d_inode))
														
 
															+		mode &= ~current->fs->umask;
														
 
															+	error = mnt_want_write(parent->mnt);
														
 
															+	if (error)
														
 
															+		goto out_dput;
														
 
															+
														
 
															+	error = btrfs_may_create(parent->dentry->d_inode, dentry);
														
 
															+	if (error)
														
 
															+		goto out_drop_write;
														
 
															+
														
 
															+	/*
														
 
															+	 * Actually perform the low-level subvolume creation after all
														
 
															+	 * this VFS fuzz.
														
 
															+	 *
														
 
															+	 * Eventually we want to pass in an inode under which we create this
														
 
															+	 * subvolume, but for now all are under the filesystem root.
														
 
															+	 *
														
 
															+	 * Also we should pass on the mode eventually to allow creating new
														
 
															+	 * subvolume with specific mode bits.
														
 
															+	 */
														
 
															+	error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, dentry,
														
 
															+			      name, namelen);
														
 
															+	if (error)
														
 
															+		goto out_drop_write;
														
 
															+
														
 
															+	fsnotify_mkdir(parent->dentry->d_inode, dentry);
														
 
															+out_drop_write:
														
 
															+	mnt_drop_write(parent->mnt);
														
 
															+out_dput:
														
 
															+	dput(dentry);
														
 
															+out_unlock:
														
 
															+	mutex_unlock(&parent->dentry->d_inode->i_mutex);
														
 
															+	return error;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+int btrfs_defrag_file(struct file *file)
														
 
															+{
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	struct page *page;
														
 
															+	unsigned long last_index;
														
 
															+	unsigned long ra_pages = root->fs_info->bdi.ra_pages;
														
 
															+	unsigned long total_read = 0;
														
 
															+	u64 page_start;
														
 
															+	u64 page_end;
														
 
															+	unsigned long i;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = btrfs_check_free_space(root, inode->i_size, 0);
														
 
															+	if (ret)
														
 
															+		return -ENOSPC;
														
 
															+
														
 
															+	mutex_lock(&inode->i_mutex);
														
 
															+	last_index = inode->i_size >> PAGE_CACHE_SHIFT;
														
 
															+	for (i = 0; i <= last_index; i++) {
														
 
															+		if (total_read % ra_pages == 0) {
														
 
															+			btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
														
 
															+				       min(last_index, i + ra_pages - 1));
														
 
															+		}
														
 
															+		total_read++;
														
 
															+again:
														
 
															+		page = grab_cache_page(inode->i_mapping, i);
														
 
															+		if (!page)
														
 
															+			goto out_unlock;
														
 
															+		if (!PageUptodate(page)) {
														
 
															+			btrfs_readpage(NULL, page);
														
 
															+			lock_page(page);
														
 
															+			if (!PageUptodate(page)) {
														
 
															+				unlock_page(page);
														
 
															+				page_cache_release(page);
														
 
															+				goto out_unlock;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		wait_on_page_writeback(page);
														
 
															+
														
 
															+		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
														
 
															+		page_end = page_start + PAGE_CACHE_SIZE - 1;
														
 
															+		lock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+
														
 
															+		ordered = btrfs_lookup_ordered_extent(inode, page_start);
														
 
															+		if (ordered) {
														
 
															+			unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+			unlock_page(page);
														
 
															+			page_cache_release(page);
														
 
															+			btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+			goto again;
														
 
															+		}
														
 
															+		set_page_extent_mapped(page);
														
 
															+
														
 
															+		/*
														
 
															+		 * this makes sure page_mkwrite is called on the
														
 
															+		 * page if it is dirtied again later
														
 
															+		 */
														
 
															+		clear_page_dirty_for_io(page);
														
 
															+
														
 
															+		btrfs_set_extent_delalloc(inode, page_start, page_end);
														
 
															+
														
 
															+		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
														
 
															+		set_page_dirty(page);
														
 
															+		unlock_page(page);
														
 
															+		page_cache_release(page);
														
 
															+		balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
														
 
															+	}
														
 
															+
														
 
															+out_unlock:
														
 
															+	mutex_unlock(&inode->i_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Called inside transaction, so use GFP_NOFS
														
 
															+ */
														
 
															+
														
 
															+static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
														
 
															+{
														
 
															+	u64 new_size;
														
 
															+	u64 old_size;
														
 
															+	u64 devid = 1;
														
 
															+	struct btrfs_ioctl_vol_args *vol_args;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_device *device = NULL;
														
 
															+	char *sizestr;
														
 
															+	char *devstr = NULL;
														
 
															+	int ret = 0;
														
 
															+	int namelen;
														
 
															+	int mod = 0;
														
 
															+
														
 
															+	vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
														
 
															+
														
 
															+	if (!vol_args)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
														
 
															+	namelen = strlen(vol_args->name);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->volume_mutex);
														
 
															+	sizestr = vol_args->name;
														
 
															+	devstr = strchr(sizestr, ':');
														
 
															+	if (devstr) {
														
 
															+		char *end;
														
 
															+		sizestr = devstr + 1;
														
 
															+		*devstr = '\0';
														
 
															+		devstr = vol_args->name;
														
 
															+		devid = simple_strtoull(devstr, &end, 10);
														
 
															+		printk(KERN_INFO "resizing devid %llu\n", devid);
														
 
															+	}
														
 
															+	device = btrfs_find_device(root, devid, NULL);
														
 
															+	if (!device) {
														
 
															+		printk(KERN_INFO "resizer unable to find device %llu\n", devid);
														
 
															+		ret = -EINVAL;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+	if (!strcmp(sizestr, "max"))
														
 
															+		new_size = device->bdev->bd_inode->i_size;
														
 
															+	else {
														
 
															+		if (sizestr[0] == '-') {
														
 
															+			mod = -1;
														
 
															+			sizestr++;
														
 
															+		} else if (sizestr[0] == '+') {
														
 
															+			mod = 1;
														
 
															+			sizestr++;
														
 
															+		}
														
 
															+		new_size = btrfs_parse_size(sizestr);
														
 
															+		if (new_size == 0) {
														
 
															+			ret = -EINVAL;
														
 
															+			goto out_unlock;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	old_size = device->total_bytes;
														
 
															+
														
 
															+	if (mod < 0) {
														
 
															+		if (new_size > old_size) {
														
 
															+			ret = -EINVAL;
														
 
															+			goto out_unlock;
														
 
															+		}
														
 
															+		new_size = old_size - new_size;
														
 
															+	} else if (mod > 0) {
														
 
															+		new_size = old_size + new_size;
														
 
															+	}
														
 
															+
														
 
															+	if (new_size < 256 * 1024 * 1024) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+	if (new_size > device->bdev->bd_inode->i_size) {
														
 
															+		ret = -EFBIG;
														
 
															+		goto out_unlock;
														
 
															+	}
														
 
															+
														
 
															+	do_div(new_size, root->sectorsize);
														
 
															+	new_size *= root->sectorsize;
														
 
															+
														
 
															+	printk(KERN_INFO "new size for %s is %llu\n",
														
 
															+		device->name, (unsigned long long)new_size);
														
 
															+
														
 
															+	if (new_size > old_size) {
														
 
															+		trans = btrfs_start_transaction(root, 1);
														
 
															+		ret = btrfs_grow_device(trans, device, new_size);
														
 
															+		btrfs_commit_transaction(trans, root);
														
 
															+	} else {
														
 
															+		ret = btrfs_shrink_device(device, new_size);
														
 
															+	}
														
 
															+
														
 
															+out_unlock:
														
 
															+	mutex_unlock(&root->fs_info->volume_mutex);
														
 
															+out:
														
 
															+	kfree(vol_args);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static noinline int btrfs_ioctl_snap_create(struct file *file,
														
 
															+					    void __user *arg)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
														
 
															+	struct btrfs_ioctl_vol_args *vol_args;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_path *path;
														
 
															+	u64 root_dirid;
														
 
															+	int namelen;
														
 
															+	int ret;
														
 
															+
														
 
															+	vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
														
 
															+
														
 
															+	if (!vol_args)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
														
 
															+	namelen = strlen(vol_args->name);
														
 
															+	if (strchr(vol_args->name, '/')) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
														
 
															+	di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
														
 
															+			    path, root_dirid,
														
 
															+			    vol_args->name, namelen, 0);
														
 
															+	btrfs_free_path(path);
														
 
															+
														
 
															+	if (di && !IS_ERR(di)) {
														
 
															+		ret = -EEXIST;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (IS_ERR(di)) {
														
 
															+		ret = PTR_ERR(di);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (root == root->fs_info->tree_root) {
														
 
															+		ret = btrfs_mksubvol(&file->f_path, vol_args->name,
														
 
															+				     file->f_path.dentry->d_inode->i_mode,
														
 
															+				     namelen);
														
 
															+	} else {
														
 
															+		ret = create_snapshot(root, vol_args->name, namelen);
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	kfree(vol_args);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_ioctl_defrag(struct file *file)
														
 
															+{
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+
														
 
															+	switch (inode->i_mode & S_IFMT) {
														
 
															+	case S_IFDIR:
														
 
															+		btrfs_defrag_root(root, 0);
														
 
															+		btrfs_defrag_root(root->fs_info->extent_root, 0);
														
 
															+		break;
														
 
															+	case S_IFREG:
														
 
															+		btrfs_defrag_file(file);
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
														
 
															+{
														
 
															+	struct btrfs_ioctl_vol_args *vol_args;
														
 
															+	int ret;
														
 
															+
														
 
															+	vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
														
 
															+
														
 
															+	if (!vol_args)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
														
 
															+	ret = btrfs_init_new_device(root, vol_args->name);
														
 
															+
														
 
															+out:
														
 
															+	kfree(vol_args);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
														
 
															+{
														
 
															+	struct btrfs_ioctl_vol_args *vol_args;
														
 
															+	int ret;
														
 
															+
														
 
															+	vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
														
 
															+
														
 
															+	if (!vol_args)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
														
 
															+	ret = btrfs_rm_device(root, vol_args->name);
														
 
															+
														
 
															+out:
														
 
															+	kfree(vol_args);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
														
 
															+{
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct file *src_file;
														
 
															+	struct inode *src;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	char *buf;
														
 
															+	struct btrfs_key key;
														
 
															+	u32 nritems;
														
 
															+	int slot;
														
 
															+	int ret;
														
 
															+
														
 
															+	src_file = fget(src_fd);
														
 
															+	if (!src_file)
														
 
															+		return -EBADF;
														
 
															+	src = src_file->f_dentry->d_inode;
														
 
															+
														
 
															+	ret = -EISDIR;
														
 
															+	if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
														
 
															+		goto out_fput;
														
 
															+
														
 
															+	ret = -EXDEV;
														
 
															+	if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
														
 
															+		goto out_fput;
														
 
															+
														
 
															+	ret = -ENOMEM;
														
 
															+	buf = vmalloc(btrfs_level_size(root, 0));
														
 
															+	if (!buf)
														
 
															+		goto out_fput;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path) {
														
 
															+		vfree(buf);
														
 
															+		goto out_fput;
														
 
															+	}
														
 
															+	path->reada = 2;
														
 
															+
														
 
															+	if (inode < src) {
														
 
															+		mutex_lock(&inode->i_mutex);
														
 
															+		mutex_lock(&src->i_mutex);
														
 
															+	} else {
														
 
															+		mutex_lock(&src->i_mutex);
														
 
															+		mutex_lock(&inode->i_mutex);
														
 
															+	}
														
 
															+
														
 
															+	ret = -ENOTEMPTY;
														
 
															+	if (inode->i_size)
														
 
															+		goto out_unlock;
														
 
															+
														
 
															+	/* do any pending delalloc/csum calc on src, one way or
														
 
															+	   another, and lock file content */
														
 
															+	while (1) {
														
 
															+		struct btrfs_ordered_extent *ordered;
														
 
															+		lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
														
 
															+		ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
														
 
															+		if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
														
 
															+			break;
														
 
															+		unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
														
 
															+		if (ordered)
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+		btrfs_wait_ordered_range(src, 0, (u64)-1);
														
 
															+	}
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+
														
 
															+	key.objectid = src->i_ino;
														
 
															+	key.type = BTRFS_EXTENT_DATA_KEY;
														
 
															+	key.offset = 0;
														
 
															+
														
 
															+	while (1) {
														
 
															+		/*
														
 
															+		 * note the key will change type as we walk through the
														
 
															+		 * tree.
														
 
															+		 */
														
 
															+		ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			goto out;
														
 
															+
														
 
															+		nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+		if (path->slots[0] >= nritems) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+			if (ret > 0)
														
 
															+				break;
														
 
															+			nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+		}
														
 
															+		leaf = path->nodes[0];
														
 
															+		slot = path->slots[0];
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, slot);
														
 
															+		if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
														
 
															+		    key.objectid != src->i_ino)
														
 
															+			break;
														
 
															+
														
 
															+		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY ||
														
 
															+		    btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
														
 
															+			u32 size;
														
 
															+			struct btrfs_key new_key;
														
 
															+
														
 
															+			size = btrfs_item_size_nr(leaf, slot);
														
 
															+			read_extent_buffer(leaf, buf,
														
 
															+					   btrfs_item_ptr_offset(leaf, slot),
														
 
															+					   size);
														
 
															+			btrfs_release_path(root, path);
														
 
															+
														
 
															+			memcpy(&new_key, &key, sizeof(new_key));
														
 
															+			new_key.objectid = inode->i_ino;
														
 
															+			ret = btrfs_insert_empty_item(trans, root, path,
														
 
															+						      &new_key, size);
														
 
															+			if (ret)
														
 
															+				goto out;
														
 
															+
														
 
															+			leaf = path->nodes[0];
														
 
															+			slot = path->slots[0];
														
 
															+			write_extent_buffer(leaf, buf,
														
 
															+					    btrfs_item_ptr_offset(leaf, slot),
														
 
															+					    size);
														
 
															+			btrfs_mark_buffer_dirty(leaf);
														
 
															+		}
														
 
															+
														
 
															+		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
														
 
															+			struct btrfs_file_extent_item *extent;
														
 
															+			int found_type;
														
 
															+
														
 
															+			extent = btrfs_item_ptr(leaf, slot,
														
 
															+						struct btrfs_file_extent_item);
														
 
															+			found_type = btrfs_file_extent_type(leaf, extent);
														
 
															+			if (found_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+				u64 ds = btrfs_file_extent_disk_bytenr(leaf,
														
 
															+								       extent);
														
 
															+				u64 dl = btrfs_file_extent_disk_num_bytes(leaf,
														
 
															+								 extent);
														
 
															+				/* ds == 0 means there's a hole */
														
 
															+				if (ds != 0) {
														
 
															+					ret = btrfs_inc_extent_ref(trans, root,
														
 
															+						     ds, dl, leaf->start,
														
 
															+						     root->root_key.objectid,
														
 
															+						     trans->transid,
														
 
															+						     inode->i_ino);
														
 
															+					BUG_ON(ret);
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+		btrfs_release_path(root, path);
														
 
															+		key.offset++;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	if (ret == 0) {
														
 
															+		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
														
 
															+		inode_set_bytes(inode, inode_get_bytes(src));
														
 
															+		btrfs_i_size_write(inode, src->i_size);
														
 
															+		BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
														
 
															+		ret = btrfs_update_inode(trans, root, inode);
														
 
															+	}
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
														
 
															+	if (ret)
														
 
															+		vmtruncate(inode, 0);
														
 
															+out_unlock:
														
 
															+	mutex_unlock(&src->i_mutex);
														
 
															+	mutex_unlock(&inode->i_mutex);
														
 
															+	vfree(buf);
														
 
															+	btrfs_free_path(path);
														
 
															+out_fput:
														
 
															+	fput(src_file);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * there are many ways the trans_start and trans_end ioctls can lead
														
 
															+ * to deadlocks.  They should only be used by applications that
														
 
															+ * basically own the machine, and have a very in depth understanding
														
 
															+ * of all the possible deadlocks and enospc problems.
														
 
															+ */
														
 
															+long btrfs_ioctl_trans_start(struct file *file)
														
 
															+{
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	if (!capable(CAP_SYS_ADMIN))
														
 
															+		return -EPERM;
														
 
															+
														
 
															+	if (file->private_data) {
														
 
															+		ret = -EINPROGRESS;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	root->fs_info->open_ioctl_trans++;
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+	trans = btrfs_start_ioctl_transaction(root, 0);
														
 
															+	if (trans)
														
 
															+		file->private_data = trans;
														
 
															+	else
														
 
															+		ret = -ENOMEM;
														
 
															+	/*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * there are many ways the trans_start and trans_end ioctls can lead
														
 
															+ * to deadlocks.  They should only be used by applications that
														
 
															+ * basically own the machine, and have a very in depth understanding
														
 
															+ * of all the possible deadlocks and enospc problems.
														
 
															+ */
														
 
															+long btrfs_ioctl_trans_end(struct file *file)
														
 
															+{
														
 
															+	struct inode *inode = fdentry(file)->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	trans = file->private_data;
														
 
															+	if (!trans) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	file->private_data = NULL;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	root->fs_info->open_ioctl_trans--;
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+out:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+long btrfs_ioctl(struct file *file, unsigned int
														
 
															+		cmd, unsigned long arg)
														
 
															+{
														
 
															+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
														
 
															+
														
 
															+	switch (cmd) {
														
 
															+	case BTRFS_IOC_SNAP_CREATE:
														
 
															+		return btrfs_ioctl_snap_create(file, (void __user *)arg);
														
 
															+	case BTRFS_IOC_DEFRAG:
														
 
															+		return btrfs_ioctl_defrag(file);
														
 
															+	case BTRFS_IOC_RESIZE:
														
 
															+		return btrfs_ioctl_resize(root, (void __user *)arg);
														
 
															+	case BTRFS_IOC_ADD_DEV:
														
 
															+		return btrfs_ioctl_add_dev(root, (void __user *)arg);
														
 
															+	case BTRFS_IOC_RM_DEV:
														
 
															+		return btrfs_ioctl_rm_dev(root, (void __user *)arg);
														
 
															+	case BTRFS_IOC_BALANCE:
														
 
															+		return btrfs_balance(root->fs_info->dev_root);
														
 
															+	case BTRFS_IOC_CLONE:
														
 
															+		return btrfs_ioctl_clone(file, arg);
														
 
															+	case BTRFS_IOC_TRANS_START:
														
 
															+		return btrfs_ioctl_trans_start(file);
														
 
															+	case BTRFS_IOC_TRANS_END:
														
 
															+		return btrfs_ioctl_trans_end(file);
														
 
															+	case BTRFS_IOC_SYNC:
														
 
															+		btrfs_start_delalloc_inodes(root);
														
 
															+		btrfs_sync_fs(file->f_dentry->d_sb, 1);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	return -ENOTTY;
														
 
															+}
														
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -0,0 +1,55 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __IOCTL_
														
 
															+#define __IOCTL_
														
 
															+#include <linux/ioctl.h>
														
 
															+
														
 
															+#define BTRFS_IOCTL_MAGIC 0x94
														
 
															+#define BTRFS_VOL_NAME_MAX 255
														
 
															+#define BTRFS_PATH_NAME_MAX 4095
														
 
															+
														
 
															+struct btrfs_ioctl_vol_args {
														
 
															+	char name[BTRFS_PATH_NAME_MAX + 1];
														
 
															+};
														
 
															+
														
 
															+#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
														
 
															+				   struct btrfs_ioctl_vol_args)
														
 
															+#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
														
 
															+				   struct btrfs_ioctl_vol_args)
														
 
															+#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
														
 
															+				   struct btrfs_ioctl_vol_args)
														
 
															+#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
														
 
															+				   struct btrfs_ioctl_vol_args)
														
 
															+/* trans start and trans end are dangerous, and only for
														
 
															+ * use by applications that know how to avoid the
														
 
															+ * resulting deadlocks
														
 
															+ */
														
 
															+#define BTRFS_IOC_TRANS_START  _IO(BTRFS_IOCTL_MAGIC, 6)
														
 
															+#define BTRFS_IOC_TRANS_END    _IO(BTRFS_IOCTL_MAGIC, 7)
														
 
															+#define BTRFS_IOC_SYNC         _IO(BTRFS_IOCTL_MAGIC, 8)
														
 
															+
														
 
															+#define BTRFS_IOC_CLONE        _IOW(BTRFS_IOCTL_MAGIC, 9, int)
														
 
															+#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
														
 
															+				   struct btrfs_ioctl_vol_args)
														
 
															+#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
														
 
															+				   struct btrfs_ioctl_vol_args)
														
 
															+#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
														
 
															+				   struct btrfs_ioctl_vol_args)
														
 
															+
														
 
															+#endif
														
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -0,0 +1,87 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/gfp.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/page-flags.h>
														
 
															+#include <asm/bug.h>
														
 
															+#include "ctree.h"
														
 
															+#include "extent_io.h"
														
 
															+#include "locking.h"
														
 
															+
														
 
															+/*
														
 
															+ * locks the per buffer mutex in an extent buffer.  This uses adaptive locks
														
 
															+ * and the spin is not tuned very extensively.  The spinning does make a big
														
 
															+ * difference in almost every workload, but spinning for the right amount of
														
 
															+ * time needs some help.
														
 
															+ *
														
 
															+ * In general, we want to spin as long as the lock holder is doing btree searches,
														
 
															+ * and we should give up if they are in more expensive code.
														
 
															+ */
														
 
															+int btrfs_tree_lock(struct extent_buffer *eb)
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	if (mutex_trylock(&eb->mutex))
														
 
															+		return 0;
														
 
															+	for (i = 0; i < 512; i++) {
														
 
															+		cpu_relax();
														
 
															+		if (mutex_trylock(&eb->mutex))
														
 
															+			return 0;
														
 
															+	}
														
 
															+	cpu_relax();
														
 
															+	mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_try_tree_lock(struct extent_buffer *eb)
														
 
															+{
														
 
															+	return mutex_trylock(&eb->mutex);
														
 
															+}
														
 
															+
														
 
															+int btrfs_tree_unlock(struct extent_buffer *eb)
														
 
															+{
														
 
															+	mutex_unlock(&eb->mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_tree_locked(struct extent_buffer *eb)
														
 
															+{
														
 
															+	return mutex_is_locked(&eb->mutex);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * btrfs_search_slot uses this to decide if it should drop its locks
														
 
															+ * before doing something expensive like allocating free blocks for cow.
														
 
															+ */
														
 
															+int btrfs_path_lock_waiting(struct btrfs_path *path, int level)
														
 
															+{
														
 
															+	int i;
														
 
															+	struct extent_buffer *eb;
														
 
															+	for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) {
														
 
															+		eb = path->nodes[i];
														
 
															+		if (!eb)
														
 
															+			break;
														
 
															+		smp_mb();
														
 
															+		if (!list_empty(&eb->mutex.wait_list))
														
 
															+			return 1;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -0,0 +1,27 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_LOCKING_
														
 
															+#define __BTRFS_LOCKING_
														
 
															+
														
 
															+int btrfs_tree_lock(struct extent_buffer *eb);
														
 
															+int btrfs_tree_unlock(struct extent_buffer *eb);
														
 
															+int btrfs_tree_locked(struct extent_buffer *eb);
														
 
															+int btrfs_try_tree_lock(struct extent_buffer *eb);
														
 
															+int btrfs_path_lock_waiting(struct btrfs_path *path, int level);
														
 
															+#endif
														
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -0,0 +1,727 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/gfp.h>
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/blkdev.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/pagevec.h>
														
 
															+#include "ctree.h"
														
 
															+#include "transaction.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "extent_io.h"
														
 
															+
														
 
															+static u64 entry_end(struct btrfs_ordered_extent *entry)
														
 
															+{
														
 
															+	if (entry->file_offset + entry->len < entry->file_offset)
														
 
															+		return (u64)-1;
														
 
															+	return entry->file_offset + entry->len;
														
 
															+}
														
 
															+
														
 
															+/* returns NULL if the insertion worked, or it returns the node it did find
														
 
															+ * in the tree
														
 
															+ */
														
 
															+static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
														
 
															+				   struct rb_node *node)
														
 
															+{
														
 
															+	struct rb_node ** p = &root->rb_node;
														
 
															+	struct rb_node * parent = NULL;
														
 
															+	struct btrfs_ordered_extent *entry;
														
 
															+
														
 
															+	while(*p) {
														
 
															+		parent = *p;
														
 
															+		entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
														
 
															+
														
 
															+		if (file_offset < entry->file_offset)
														
 
															+			p = &(*p)->rb_left;
														
 
															+		else if (file_offset >= entry_end(entry))
														
 
															+			p = &(*p)->rb_right;
														
 
															+		else
														
 
															+			return parent;
														
 
															+	}
														
 
															+
														
 
															+	rb_link_node(node, parent, p);
														
 
															+	rb_insert_color(node, root);
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * look for a given offset in the tree, and if it can't be found return the
														
 
															+ * first lesser offset
														
 
															+ */
														
 
															+static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
														
 
															+				     struct rb_node **prev_ret)
														
 
															+{
														
 
															+	struct rb_node * n = root->rb_node;
														
 
															+	struct rb_node *prev = NULL;
														
 
															+	struct rb_node *test;
														
 
															+	struct btrfs_ordered_extent *entry;
														
 
															+	struct btrfs_ordered_extent *prev_entry = NULL;
														
 
															+
														
 
															+	while(n) {
														
 
															+		entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
														
 
															+		prev = n;
														
 
															+		prev_entry = entry;
														
 
															+
														
 
															+		if (file_offset < entry->file_offset)
														
 
															+			n = n->rb_left;
														
 
															+		else if (file_offset >= entry_end(entry))
														
 
															+			n = n->rb_right;
														
 
															+		else
														
 
															+			return n;
														
 
															+	}
														
 
															+	if (!prev_ret)
														
 
															+		return NULL;
														
 
															+
														
 
															+	while(prev && file_offset >= entry_end(prev_entry)) {
														
 
															+		test = rb_next(prev);
														
 
															+		if (!test)
														
 
															+			break;
														
 
															+		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
														
 
															+				      rb_node);
														
 
															+		if (file_offset < entry_end(prev_entry))
														
 
															+			break;
														
 
															+
														
 
															+		prev = test;
														
 
															+	}
														
 
															+	if (prev)
														
 
															+		prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
														
 
															+				      rb_node);
														
 
															+	while(prev && file_offset < entry_end(prev_entry)) {
														
 
															+		test = rb_prev(prev);
														
 
															+		if (!test)
														
 
															+			break;
														
 
															+		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
														
 
															+				      rb_node);
														
 
															+		prev = test;
														
 
															+	}
														
 
															+	*prev_ret = prev;
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper to check if a given offset is inside a given entry
														
 
															+ */
														
 
															+static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
														
 
															+{
														
 
															+	if (file_offset < entry->file_offset ||
														
 
															+	    entry->file_offset + entry->len <= file_offset)
														
 
															+		return 0;
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * look find the first ordered struct that has this offset, otherwise
														
 
															+ * the first one less than this offset
														
 
															+ */
														
 
															+static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
														
 
															+					  u64 file_offset)
														
 
															+{
														
 
															+	struct rb_root *root = &tree->tree;
														
 
															+	struct rb_node *prev;
														
 
															+	struct rb_node *ret;
														
 
															+	struct btrfs_ordered_extent *entry;
														
 
															+
														
 
															+	if (tree->last) {
														
 
															+		entry = rb_entry(tree->last, struct btrfs_ordered_extent,
														
 
															+				 rb_node);
														
 
															+		if (offset_in_entry(entry, file_offset))
														
 
															+			return tree->last;
														
 
															+	}
														
 
															+	ret = __tree_search(root, file_offset, &prev);
														
 
															+	if (!ret)
														
 
															+		ret = prev;
														
 
															+	if (ret)
														
 
															+		tree->last = ret;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* allocate and add a new ordered_extent into the per-inode tree.
														
 
															+ * file_offset is the logical offset in the file
														
 
															+ *
														
 
															+ * start is the disk block number of an extent already reserved in the
														
 
															+ * extent allocation tree
														
 
															+ *
														
 
															+ * len is the length of the extent
														
 
															+ *
														
 
															+ * This also sets the EXTENT_ORDERED bit on the range in the inode.
														
 
															+ *
														
 
															+ * The tree is given a single reference on the ordered extent that was
														
 
															+ * inserted.
														
 
															+ */
														
 
															+int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
														
 
															+			     u64 start, u64 len, int nocow)
														
 
															+{
														
 
															+	struct btrfs_ordered_inode_tree *tree;
														
 
															+	struct rb_node *node;
														
 
															+	struct btrfs_ordered_extent *entry;
														
 
															+
														
 
															+	tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	entry = kzalloc(sizeof(*entry), GFP_NOFS);
														
 
															+	if (!entry)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	entry->file_offset = file_offset;
														
 
															+	entry->start = start;
														
 
															+	entry->len = len;
														
 
															+	entry->inode = inode;
														
 
															+	if (nocow)
														
 
															+		set_bit(BTRFS_ORDERED_NOCOW, &entry->flags);
														
 
															+
														
 
															+	/* one ref for the tree */
														
 
															+	atomic_set(&entry->refs, 1);
														
 
															+	init_waitqueue_head(&entry->wait);
														
 
															+	INIT_LIST_HEAD(&entry->list);
														
 
															+	INIT_LIST_HEAD(&entry->root_extent_list);
														
 
															+
														
 
															+	node = tree_insert(&tree->tree, file_offset,
														
 
															+			   &entry->rb_node);
														
 
															+	if (node) {
														
 
															+		printk("warning dup entry from add_ordered_extent\n");
														
 
															+		BUG();
														
 
															+	}
														
 
															+	set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
														
 
															+			   entry_end(entry) - 1, GFP_NOFS);
														
 
															+
														
 
															+	spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
														
 
															+	list_add_tail(&entry->root_extent_list,
														
 
															+		      &BTRFS_I(inode)->root->fs_info->ordered_extents);
														
 
															+	spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
														
 
															+
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	BUG_ON(node);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
														
 
															+ * when an ordered extent is finished.  If the list covers more than one
														
 
															+ * ordered extent, it is split across multiples.
														
 
															+ */
														
 
															+int btrfs_add_ordered_sum(struct inode *inode,
														
 
															+			  struct btrfs_ordered_extent *entry,
														
 
															+			  struct btrfs_ordered_sum *sum)
														
 
															+{
														
 
															+	struct btrfs_ordered_inode_tree *tree;
														
 
															+
														
 
															+	tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	list_add_tail(&sum->list, &entry->list);
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this is used to account for finished IO across a given range
														
 
															+ * of the file.  The IO should not span ordered extents.  If
														
 
															+ * a given ordered_extent is completely done, 1 is returned, otherwise
														
 
															+ * 0.
														
 
															+ *
														
 
															+ * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
														
 
															+ * to make sure this function only returns 1 once for a given ordered extent.
														
 
															+ */
														
 
															+int btrfs_dec_test_ordered_pending(struct inode *inode,
														
 
															+				   u64 file_offset, u64 io_size)
														
 
															+{
														
 
															+	struct btrfs_ordered_inode_tree *tree;
														
 
															+	struct rb_node *node;
														
 
															+	struct btrfs_ordered_extent *entry;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	int ret;
														
 
															+
														
 
															+	tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
														
 
															+			     GFP_NOFS);
														
 
															+	node = tree_search(tree, file_offset);
														
 
															+	if (!node) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
														
 
															+	if (!offset_in_entry(entry, file_offset)) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	ret = test_range_bit(io_tree, entry->file_offset,
														
 
															+			     entry->file_offset + entry->len - 1,
														
 
															+			     EXTENT_ORDERED, 0);
														
 
															+	if (ret == 0)
														
 
															+		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
														
 
															+out:
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	return ret == 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * used to drop a reference on an ordered extent.  This will free
														
 
															+ * the extent if the last reference is dropped
														
 
															+ */
														
 
															+int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
														
 
															+{
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_ordered_sum *sum;
														
 
															+
														
 
															+	if (atomic_dec_and_test(&entry->refs)) {
														
 
															+		while(!list_empty(&entry->list)) {
														
 
															+			cur = entry->list.next;
														
 
															+			sum = list_entry(cur, struct btrfs_ordered_sum, list);
														
 
															+			list_del(&sum->list);
														
 
															+			kfree(sum);
														
 
															+		}
														
 
															+		kfree(entry);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * remove an ordered extent from the tree.  No references are dropped
														
 
															+ * but, anyone waiting on this extent is woken up.
														
 
															+ */
														
 
															+int btrfs_remove_ordered_extent(struct inode *inode,
														
 
															+				struct btrfs_ordered_extent *entry)
														
 
															+{
														
 
															+	struct btrfs_ordered_inode_tree *tree;
														
 
															+	struct rb_node *node;
														
 
															+
														
 
															+	tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	node = &entry->rb_node;
														
 
															+	rb_erase(node, &tree->tree);
														
 
															+	tree->last = NULL;
														
 
															+	set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
														
 
															+
														
 
															+	spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
														
 
															+	list_del_init(&entry->root_extent_list);
														
 
															+	spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
														
 
															+
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	wake_up(&entry->wait);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * wait for all the ordered extents in a root.  This is done when balancing
														
 
															+ * space between drives.
														
 
															+ */
														
 
															+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
														
 
															+{
														
 
															+	struct list_head splice;
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	struct inode *inode;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&splice);
														
 
															+
														
 
															+	spin_lock(&root->fs_info->ordered_extent_lock);
														
 
															+	list_splice_init(&root->fs_info->ordered_extents, &splice);
														
 
															+	while (!list_empty(&splice)) {
														
 
															+		cur = splice.next;
														
 
															+		ordered = list_entry(cur, struct btrfs_ordered_extent,
														
 
															+				     root_extent_list);
														
 
															+		if (nocow_only &&
														
 
															+		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
														
 
															+			list_move(&ordered->root_extent_list,
														
 
															+				  &root->fs_info->ordered_extents);
														
 
															+			cond_resched_lock(&root->fs_info->ordered_extent_lock);
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		list_del_init(&ordered->root_extent_list);
														
 
															+		atomic_inc(&ordered->refs);
														
 
															+
														
 
															+		/*
														
 
															+		 * the inode may be getting freed (in sys_unlink path).
														
 
															+		 */
														
 
															+		inode = igrab(ordered->inode);
														
 
															+
														
 
															+		spin_unlock(&root->fs_info->ordered_extent_lock);
														
 
															+
														
 
															+		if (inode) {
														
 
															+			btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+			iput(inode);
														
 
															+		} else {
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+		}
														
 
															+
														
 
															+		spin_lock(&root->fs_info->ordered_extent_lock);
														
 
															+	}
														
 
															+	spin_unlock(&root->fs_info->ordered_extent_lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Used to start IO or wait for a given ordered extent to finish.
														
 
															+ *
														
 
															+ * If wait is one, this effectively waits on page writeback for all the pages
														
 
															+ * in the extent, and it waits on the io completion code to insert
														
 
															+ * metadata into the btree corresponding to the extent
														
 
															+ */
														
 
															+void btrfs_start_ordered_extent(struct inode *inode,
														
 
															+				       struct btrfs_ordered_extent *entry,
														
 
															+				       int wait)
														
 
															+{
														
 
															+	u64 start = entry->file_offset;
														
 
															+	u64 end = start + entry->len - 1;
														
 
															+
														
 
															+	/*
														
 
															+	 * pages in the range can be dirty, clean or writeback.  We
														
 
															+	 * start IO on any dirty ones so the wait doesn't stall waiting
														
 
															+	 * for pdflush to find them
														
 
															+	 */
														
 
															+	btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE);
														
 
															+	if (wait)
														
 
															+		wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
														
 
															+						 &entry->flags));
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Used to wait on ordered extents across a large range of bytes.
														
 
															+ */
														
 
															+int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
														
 
															+{
														
 
															+	u64 end;
														
 
															+	u64 orig_end;
														
 
															+	u64 wait_end;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+
														
 
															+	if (start + len < start) {
														
 
															+		orig_end = INT_LIMIT(loff_t);
														
 
															+	} else {
														
 
															+		orig_end = start + len - 1;
														
 
															+		if (orig_end > INT_LIMIT(loff_t))
														
 
															+			orig_end = INT_LIMIT(loff_t);
														
 
															+	}
														
 
															+	wait_end = orig_end;
														
 
															+again:
														
 
															+	/* start IO across the range first to instantiate any delalloc
														
 
															+	 * extents
														
 
															+	 */
														
 
															+	btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE);
														
 
															+
														
 
															+	btrfs_wait_on_page_writeback_range(inode->i_mapping,
														
 
															+					   start >> PAGE_CACHE_SHIFT,
														
 
															+					   orig_end >> PAGE_CACHE_SHIFT);
														
 
															+
														
 
															+	end = orig_end;
														
 
															+	while(1) {
														
 
															+		ordered = btrfs_lookup_first_ordered_extent(inode, end);
														
 
															+		if (!ordered) {
														
 
															+			break;
														
 
															+		}
														
 
															+		if (ordered->file_offset > orig_end) {
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+			break;
														
 
															+		}
														
 
															+		if (ordered->file_offset + ordered->len < start) {
														
 
															+			btrfs_put_ordered_extent(ordered);
														
 
															+			break;
														
 
															+		}
														
 
															+		btrfs_start_ordered_extent(inode, ordered, 1);
														
 
															+		end = ordered->file_offset;
														
 
															+		btrfs_put_ordered_extent(ordered);
														
 
															+		if (end == 0 || end == start)
														
 
															+			break;
														
 
															+		end--;
														
 
															+	}
														
 
															+	if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
														
 
															+			   EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
														
 
															+		printk("inode %lu still ordered or delalloc after wait "
														
 
															+		       "%llu %llu\n", inode->i_ino,
														
 
															+		       (unsigned long long)start,
														
 
															+		       (unsigned long long)orig_end);
														
 
															+		goto again;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * find an ordered extent corresponding to file_offset.  return NULL if
														
 
															+ * nothing is found, otherwise take a reference on the extent and return it
														
 
															+ */
														
 
															+struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
														
 
															+							 u64 file_offset)
														
 
															+{
														
 
															+	struct btrfs_ordered_inode_tree *tree;
														
 
															+	struct rb_node *node;
														
 
															+	struct btrfs_ordered_extent *entry = NULL;
														
 
															+
														
 
															+	tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	node = tree_search(tree, file_offset);
														
 
															+	if (!node)
														
 
															+		goto out;
														
 
															+
														
 
															+	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
														
 
															+	if (!offset_in_entry(entry, file_offset))
														
 
															+		entry = NULL;
														
 
															+	if (entry)
														
 
															+		atomic_inc(&entry->refs);
														
 
															+out:
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	return entry;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * lookup and return any extent before 'file_offset'.  NULL is returned
														
 
															+ * if none is found
														
 
															+ */
														
 
															+struct btrfs_ordered_extent *
														
 
															+btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
														
 
															+{
														
 
															+	struct btrfs_ordered_inode_tree *tree;
														
 
															+	struct rb_node *node;
														
 
															+	struct btrfs_ordered_extent *entry = NULL;
														
 
															+
														
 
															+	tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	node = tree_search(tree, file_offset);
														
 
															+	if (!node)
														
 
															+		goto out;
														
 
															+
														
 
															+	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
														
 
															+	atomic_inc(&entry->refs);
														
 
															+out:
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	return entry;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * After an extent is done, call this to conditionally update the on disk
														
 
															+ * i_size.  i_size is updated to cover any fully written part of the file.
														
 
															+ */
														
 
															+int btrfs_ordered_update_i_size(struct inode *inode,
														
 
															+				struct btrfs_ordered_extent *ordered)
														
 
															+{
														
 
															+	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
														
 
															+	u64 disk_i_size;
														
 
															+	u64 new_i_size;
														
 
															+	u64 i_size_test;
														
 
															+	struct rb_node *node;
														
 
															+	struct btrfs_ordered_extent *test;
														
 
															+
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	disk_i_size = BTRFS_I(inode)->disk_i_size;
														
 
															+
														
 
															+	/*
														
 
															+	 * if the disk i_size is already at the inode->i_size, or
														
 
															+	 * this ordered extent is inside the disk i_size, we're done
														
 
															+	 */
														
 
															+	if (disk_i_size >= inode->i_size ||
														
 
															+	    ordered->file_offset + ordered->len <= disk_i_size) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * we can't update the disk_isize if there are delalloc bytes
														
 
															+	 * between disk_i_size and  this ordered extent
														
 
															+	 */
														
 
															+	if (test_range_bit(io_tree, disk_i_size,
														
 
															+			   ordered->file_offset + ordered->len - 1,
														
 
															+			   EXTENT_DELALLOC, 0)) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+	/*
														
 
															+	 * walk backward from this ordered extent to disk_i_size.
														
 
															+	 * if we find an ordered extent then we can't update disk i_size
														
 
															+	 * yet
														
 
															+	 */
														
 
															+	node = &ordered->rb_node;
														
 
															+	while(1) {
														
 
															+		node = rb_prev(node);
														
 
															+		if (!node)
														
 
															+			break;
														
 
															+		test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
														
 
															+		if (test->file_offset + test->len <= disk_i_size)
														
 
															+			break;
														
 
															+		if (test->file_offset >= inode->i_size)
														
 
															+			break;
														
 
															+		if (test->file_offset >= disk_i_size)
														
 
															+			goto out;
														
 
															+	}
														
 
															+	new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode));
														
 
															+
														
 
															+	/*
														
 
															+	 * at this point, we know we can safely update i_size to at least
														
 
															+	 * the offset from this ordered extent.  But, we need to
														
 
															+	 * walk forward and see if ios from higher up in the file have
														
 
															+	 * finished.
														
 
															+	 */
														
 
															+	node = rb_next(&ordered->rb_node);
														
 
															+	i_size_test = 0;
														
 
															+	if (node) {
														
 
															+		/*
														
 
															+		 * do we have an area where IO might have finished
														
 
															+		 * between our ordered extent and the next one.
														
 
															+		 */
														
 
															+		test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
														
 
															+		if (test->file_offset > entry_end(ordered)) {
														
 
															+			i_size_test = test->file_offset;
														
 
															+		}
														
 
															+	} else {
														
 
															+		i_size_test = i_size_read(inode);
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * i_size_test is the end of a region after this ordered
														
 
															+	 * extent where there are no ordered extents.  As long as there
														
 
															+	 * are no delalloc bytes in this area, it is safe to update
														
 
															+	 * disk_i_size to the end of the region.
														
 
															+	 */
														
 
															+	if (i_size_test > entry_end(ordered) &&
														
 
															+	    !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
														
 
															+			   EXTENT_DELALLOC, 0)) {
														
 
															+		new_i_size = min_t(u64, i_size_test, i_size_read(inode));
														
 
															+	}
														
 
															+	BTRFS_I(inode)->disk_i_size = new_i_size;
														
 
															+out:
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * search the ordered extents for one corresponding to 'offset' and
														
 
															+ * try to find a checksum.  This is used because we allow pages to
														
 
															+ * be reclaimed before their checksum is actually put into the btree
														
 
															+ */
														
 
															+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
														
 
															+{
														
 
															+	struct btrfs_ordered_sum *ordered_sum;
														
 
															+	struct btrfs_sector_sum *sector_sums;
														
 
															+	struct btrfs_ordered_extent *ordered;
														
 
															+	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
														
 
															+	struct list_head *cur;
														
 
															+	unsigned long num_sectors;
														
 
															+	unsigned long i;
														
 
															+	u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
														
 
															+	int ret = 1;
														
 
															+
														
 
															+	ordered = btrfs_lookup_ordered_extent(inode, offset);
														
 
															+	if (!ordered)
														
 
															+		return 1;
														
 
															+
														
 
															+	mutex_lock(&tree->mutex);
														
 
															+	list_for_each_prev(cur, &ordered->list) {
														
 
															+		ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
														
 
															+		if (offset >= ordered_sum->file_offset) {
														
 
															+			num_sectors = ordered_sum->len / sectorsize;
														
 
															+			sector_sums = ordered_sum->sums;
														
 
															+			for (i = 0; i < num_sectors; i++) {
														
 
															+				if (sector_sums[i].offset == offset) {
														
 
															+					*sum = sector_sums[i].sum;
														
 
															+					ret = 0;
														
 
															+					goto out;
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+out:
														
 
															+	mutex_unlock(&tree->mutex);
														
 
															+	btrfs_put_ordered_extent(ordered);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/**
														
 
															+ * taken from mm/filemap.c because it isn't exported
														
 
															+ *
														
 
															+ * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
														
 
															+ * @mapping:	address space structure to write
														
 
															+ * @start:	offset in bytes where the range starts
														
 
															+ * @end:	offset in bytes where the range ends (inclusive)
														
 
															+ * @sync_mode:	enable synchronous operation
														
 
															+ *
														
 
															+ * Start writeback against all of a mapping's dirty pages that lie
														
 
															+ * within the byte offsets <start, end> inclusive.
														
 
															+ *
														
 
															+ * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
														
 
															+ * opposed to a regular memory cleansing writeback.  The difference between
														
 
															+ * these two operations is that if a dirty page/buffer is encountered, it must
														
 
															+ * be waited upon, and not just skipped over.
														
 
															+ */
														
 
															+int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
														
 
															+			   loff_t end, int sync_mode)
														
 
															+{
														
 
															+	struct writeback_control wbc = {
														
 
															+		.sync_mode = sync_mode,
														
 
															+		.nr_to_write = mapping->nrpages * 2,
														
 
															+		.range_start = start,
														
 
															+		.range_end = end,
														
 
															+		.for_writepages = 1,
														
 
															+	};
														
 
															+	return btrfs_writepages(mapping, &wbc);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * taken from mm/filemap.c because it isn't exported
														
 
															+ *
														
 
															+ * wait_on_page_writeback_range - wait for writeback to complete
														
 
															+ * @mapping:	target address_space
														
 
															+ * @start:	beginning page index
														
 
															+ * @end:	ending page index
														
 
															+ *
														
 
															+ * Wait for writeback to complete against pages indexed by start->end
														
 
															+ * inclusive
														
 
															+ */
														
 
															+int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
														
 
															+				       pgoff_t start, pgoff_t end)
														
 
															+{
														
 
															+	struct pagevec pvec;
														
 
															+	int nr_pages;
														
 
															+	int ret = 0;
														
 
															+	pgoff_t index;
														
 
															+
														
 
															+	if (end < start)
														
 
															+		return 0;
														
 
															+
														
 
															+	pagevec_init(&pvec, 0);
														
 
															+	index = start;
														
 
															+	while ((index <= end) &&
														
 
															+			(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
														
 
															+			PAGECACHE_TAG_WRITEBACK,
														
 
															+			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
														
 
															+		unsigned i;
														
 
															+
														
 
															+		for (i = 0; i < nr_pages; i++) {
														
 
															+			struct page *page = pvec.pages[i];
														
 
															+
														
 
															+			/* until radix tree lookup accepts end_index */
														
 
															+			if (page->index > end)
														
 
															+				continue;
														
 
															+
														
 
															+			wait_on_page_writeback(page);
														
 
															+			if (PageError(page))
														
 
															+				ret = -EIO;
														
 
															+		}
														
 
															+		pagevec_release(&pvec);
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+
														
 
															+	/* Check for outstanding write errors */
														
 
															+	if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
														
 
															+		ret = -ENOSPC;
														
 
															+	if (test_and_clear_bit(AS_EIO, &mapping->flags))
														
 
															+		ret = -EIO;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -0,0 +1,149 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_ORDERED_DATA__
														
 
															+#define __BTRFS_ORDERED_DATA__
														
 
															+
														
 
															+/* one of these per inode */
														
 
															+struct btrfs_ordered_inode_tree {
														
 
															+	struct mutex mutex;
														
 
															+	struct rb_root tree;
														
 
															+	struct rb_node *last;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * these are used to collect checksums done just before bios submission.
														
 
															+ * They are attached via a list into the ordered extent, and
														
 
															+ * checksum items are inserted into the tree after all the blocks in
														
 
															+ * the ordered extent are on disk
														
 
															+ */
														
 
															+struct btrfs_sector_sum {
														
 
															+	u64 offset;
														
 
															+	u32 sum;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_ordered_sum {
														
 
															+	u64 file_offset;
														
 
															+	/*
														
 
															+	 * this is the length in bytes covered by the sums array below.
														
 
															+	 * But, the sums array may not be contiguous in the file.
														
 
															+	 */
														
 
															+	unsigned long len;
														
 
															+	struct list_head list;
														
 
															+	/* last field is a variable length array of btrfs_sector_sums */
														
 
															+	struct btrfs_sector_sum sums[];
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * bits for the flags field:
														
 
															+ *
														
 
															+ * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
														
 
															+ * It is used to make sure metadata is inserted into the tree only once
														
 
															+ * per extent.
														
 
															+ *
														
 
															+ * BTRFS_ORDERED_COMPLETE is set when the extent is removed from the
														
 
															+ * rbtree, just before waking any waiters.  It is used to indicate the
														
 
															+ * IO is done and any metadata is inserted into the tree.
														
 
															+ */
														
 
															+#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
														
 
															+
														
 
															+#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
														
 
															+
														
 
															+#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
														
 
															+
														
 
															+struct btrfs_ordered_extent {
														
 
															+	/* logical offset in the file */
														
 
															+	u64 file_offset;
														
 
															+
														
 
															+	/* disk byte number */
														
 
															+	u64 start;
														
 
															+
														
 
															+	/* length of the extent in bytes */
														
 
															+	u64 len;
														
 
															+
														
 
															+	/* flags (described above) */
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	/* reference count */
														
 
															+	atomic_t refs;
														
 
															+
														
 
															+	/* the inode we belong to */
														
 
															+	struct inode *inode;
														
 
															+
														
 
															+	/* list of checksums for insertion when the extent io is done */
														
 
															+	struct list_head list;
														
 
															+
														
 
															+	/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
														
 
															+	wait_queue_head_t wait;
														
 
															+
														
 
															+	/* our friendly rbtree entry */
														
 
															+	struct rb_node rb_node;
														
 
															+
														
 
															+	/* a per root list of all the pending ordered extents */
														
 
															+	struct list_head root_extent_list;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * calculates the total size you need to allocate for an ordered sum
														
 
															+ * structure spanning 'bytes' in the file
														
 
															+ */
														
 
															+static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
														
 
															+					 unsigned long bytes)
														
 
															+{
														
 
															+	unsigned long num_sectors = (bytes + root->sectorsize - 1) /
														
 
															+		root->sectorsize;
														
 
															+	num_sectors++;
														
 
															+	return sizeof(struct btrfs_ordered_sum) +
														
 
															+		num_sectors * sizeof(struct btrfs_sector_sum);
														
 
															+}
														
 
															+
														
 
															+static inline void
														
 
															+btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
														
 
															+{
														
 
															+	mutex_init(&t->mutex);
														
 
															+	t->tree.rb_node = NULL;
														
 
															+	t->last = NULL;
														
 
															+}
														
 
															+
														
 
															+int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
														
 
															+int btrfs_remove_ordered_extent(struct inode *inode,
														
 
															+				struct btrfs_ordered_extent *entry);
														
 
															+int btrfs_dec_test_ordered_pending(struct inode *inode,
														
 
															+				       u64 file_offset, u64 io_size);
														
 
															+int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
														
 
															+			     u64 start, u64 len, int nocow);
														
 
															+int btrfs_add_ordered_sum(struct inode *inode,
														
 
															+			  struct btrfs_ordered_extent *entry,
														
 
															+			  struct btrfs_ordered_sum *sum);
														
 
															+struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
														
 
															+							 u64 file_offset);
														
 
															+void btrfs_start_ordered_extent(struct inode *inode,
														
 
															+				struct btrfs_ordered_extent *entry, int wait);
														
 
															+int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
														
 
															+struct btrfs_ordered_extent *
														
 
															+btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
														
 
															+int btrfs_ordered_update_i_size(struct inode *inode,
														
 
															+				struct btrfs_ordered_extent *ordered);
														
 
															+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
														
 
															+int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
														
 
															+				       pgoff_t start, pgoff_t end);
														
 
															+int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
														
 
															+			   loff_t end, int sync_mode);
														
 
															+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
														
 
															+#endif
														
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -0,0 +1,67 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Red Hat.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+
														
 
															+int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root, u64 offset)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	key.objectid = BTRFS_ORPHAN_OBJECTID;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
														
 
															+	key.offset = offset;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, u64 offset)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	key.objectid = BTRFS_ORPHAN_OBJECTID;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
														
 
															+	key.offset = offset;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	ret = btrfs_del_item(trans, root, path);
														
 
															+
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -0,0 +1,200 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "print-tree.h"
														
 
															+
														
 
															+static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
														
 
															+{
														
 
															+	int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
														
 
															+	int i;
														
 
															+	printk("\t\tchunk length %llu owner %llu type %llu num_stripes %d\n",
														
 
															+	       (unsigned long long)btrfs_chunk_length(eb, chunk),
														
 
															+	       (unsigned long long)btrfs_chunk_owner(eb, chunk),
														
 
															+	       (unsigned long long)btrfs_chunk_type(eb, chunk),
														
 
															+	       num_stripes);
														
 
															+	for (i = 0 ; i < num_stripes ; i++) {
														
 
															+		printk("\t\t\tstripe %d devid %llu offset %llu\n", i,
														
 
															+		      (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
														
 
															+		      (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
														
 
															+	}
														
 
															+}
														
 
															+static void print_dev_item(struct extent_buffer *eb,
														
 
															+			   struct btrfs_dev_item *dev_item)
														
 
															+{
														
 
															+	printk("\t\tdev item devid %llu "
														
 
															+	       "total_bytes %llu bytes used %Lu\n",
														
 
															+	       (unsigned long long)btrfs_device_id(eb, dev_item),
														
 
															+	       (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
														
 
															+	       (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
														
 
															+}
														
 
															+void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
														
 
															+{
														
 
															+	int i;
														
 
															+	u32 nr = btrfs_header_nritems(l);
														
 
															+	struct btrfs_item *item;
														
 
															+	struct btrfs_extent_item *ei;
														
 
															+	struct btrfs_root_item *ri;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_inode_item *ii;
														
 
															+	struct btrfs_block_group_item *bi;
														
 
															+	struct btrfs_file_extent_item *fi;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_extent_ref *ref;
														
 
															+	struct btrfs_dev_extent *dev_extent;
														
 
															+	u32 type;
														
 
															+
														
 
															+	printk("leaf %llu total ptrs %d free space %d\n",
														
 
															+		(unsigned long long)btrfs_header_bytenr(l), nr,
														
 
															+		btrfs_leaf_free_space(root, l));
														
 
															+	for (i = 0 ; i < nr ; i++) {
														
 
															+		item = btrfs_item_nr(l, i);
														
 
															+		btrfs_item_key_to_cpu(l, &key, i);
														
 
															+		type = btrfs_key_type(&key);
														
 
															+		printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n",
														
 
															+			i,
														
 
															+			(unsigned long long)key.objectid, type,
														
 
															+			(unsigned long long)key.offset,
														
 
															+			btrfs_item_offset(l, item), btrfs_item_size(l, item));
														
 
															+		switch (type) {
														
 
															+		case BTRFS_INODE_ITEM_KEY:
														
 
															+			ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
														
 
															+			printk("\t\tinode generation %llu size %llu mode %o\n",
														
 
															+		              (unsigned long long)btrfs_inode_generation(l, ii),
														
 
															+			      (unsigned long long)btrfs_inode_size(l, ii),
														
 
															+			       btrfs_inode_mode(l, ii));
														
 
															+			break;
														
 
															+		case BTRFS_DIR_ITEM_KEY:
														
 
															+			di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
														
 
															+			btrfs_dir_item_key_to_cpu(l, di, &found_key);
														
 
															+			printk("\t\tdir oid %llu type %u\n",
														
 
															+				(unsigned long long)found_key.objectid,
														
 
															+				btrfs_dir_type(l, di));
														
 
															+			break;
														
 
															+		case BTRFS_ROOT_ITEM_KEY:
														
 
															+			ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
														
 
															+			printk("\t\troot data bytenr %llu refs %u\n",
														
 
															+				(unsigned long long)btrfs_disk_root_bytenr(l, ri),
														
 
															+				btrfs_disk_root_refs(l, ri));
														
 
															+			break;
														
 
															+		case BTRFS_EXTENT_ITEM_KEY:
														
 
															+			ei = btrfs_item_ptr(l, i, struct btrfs_extent_item);
														
 
															+			printk("\t\textent data refs %u\n",
														
 
															+				btrfs_extent_refs(l, ei));
														
 
															+			break;
														
 
															+		case BTRFS_EXTENT_REF_KEY:
														
 
															+			ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref);
														
 
															+			printk("\t\textent back ref root %llu gen %llu "
														
 
															+			       "owner %llu num_refs %lu\n",
														
 
															+			       (unsigned long long)btrfs_ref_root(l, ref),
														
 
															+			       (unsigned long long)btrfs_ref_generation(l, ref),
														
 
															+			       (unsigned long long)btrfs_ref_objectid(l, ref),
														
 
															+			       (unsigned long)btrfs_ref_num_refs(l, ref));
														
 
															+			break;
														
 
															+
														
 
															+		case BTRFS_EXTENT_DATA_KEY:
														
 
															+			fi = btrfs_item_ptr(l, i,
														
 
															+					    struct btrfs_file_extent_item);
														
 
															+			if (btrfs_file_extent_type(l, fi) ==
														
 
															+			    BTRFS_FILE_EXTENT_INLINE) {
														
 
															+				printk("\t\tinline extent data size %u\n",
														
 
															+			           btrfs_file_extent_inline_len(l, item));
														
 
															+				break;
														
 
															+			}
														
 
															+			printk("\t\textent data disk bytenr %llu nr %llu\n",
														
 
															+			       (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi),
														
 
															+			       (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi));
														
 
															+			printk("\t\textent data offset %llu nr %llu\n",
														
 
															+			  (unsigned long long)btrfs_file_extent_offset(l, fi),
														
 
															+			  (unsigned long long)btrfs_file_extent_num_bytes(l, fi));
														
 
															+			break;
														
 
															+		case BTRFS_BLOCK_GROUP_ITEM_KEY:
														
 
															+			bi = btrfs_item_ptr(l, i,
														
 
															+					    struct btrfs_block_group_item);
														
 
															+			printk("\t\tblock group used %llu\n",
														
 
															+			       (unsigned long long)btrfs_disk_block_group_used(l, bi));
														
 
															+			break;
														
 
															+		case BTRFS_CHUNK_ITEM_KEY:
														
 
															+			print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk));
														
 
															+			break;
														
 
															+		case BTRFS_DEV_ITEM_KEY:
														
 
															+			print_dev_item(l, btrfs_item_ptr(l, i,
														
 
															+					struct btrfs_dev_item));
														
 
															+			break;
														
 
															+		case BTRFS_DEV_EXTENT_KEY:
														
 
															+			dev_extent = btrfs_item_ptr(l, i,
														
 
															+						    struct btrfs_dev_extent);
														
 
															+			printk("\t\tdev extent chunk_tree %llu\n"
														
 
															+			       "\t\tchunk objectid %llu chunk offset %llu "
														
 
															+			       "length %llu\n",
														
 
															+			       (unsigned long long)
														
 
															+			       btrfs_dev_extent_chunk_tree(l, dev_extent),
														
 
															+			       (unsigned long long)
														
 
															+			       btrfs_dev_extent_chunk_objectid(l, dev_extent),
														
 
															+			       (unsigned long long)
														
 
															+			       btrfs_dev_extent_chunk_offset(l, dev_extent),
														
 
															+			       (unsigned long long)
														
 
															+			       btrfs_dev_extent_length(l, dev_extent));
														
 
															+		};
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
														
 
															+{
														
 
															+	int i; u32 nr;
														
 
															+	struct btrfs_key key;
														
 
															+	int level;
														
 
															+
														
 
															+	if (!c)
														
 
															+		return;
														
 
															+	nr = btrfs_header_nritems(c);
														
 
															+	level = btrfs_header_level(c);
														
 
															+	if (level == 0) {
														
 
															+		btrfs_print_leaf(root, c);
														
 
															+		return;
														
 
															+	}
														
 
															+	printk("node %llu level %d total ptrs %d free spc %u\n",
														
 
															+	       (unsigned long long)btrfs_header_bytenr(c),
														
 
															+	       btrfs_header_level(c), nr,
														
 
															+	       (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
														
 
															+	for (i = 0; i < nr; i++) {
														
 
															+		btrfs_node_key_to_cpu(c, &key, i);
														
 
															+		printk("\tkey %d (%llu %u %llu) block %llu\n",
														
 
															+		       i,
														
 
															+		       (unsigned long long)key.objectid,
														
 
															+		       key.type,
														
 
															+		       (unsigned long long)key.offset,
														
 
															+		       (unsigned long long)btrfs_node_blockptr(c, i));
														
 
															+	}
														
 
															+	for (i = 0; i < nr; i++) {
														
 
															+		struct extent_buffer *next = read_tree_block(root,
														
 
															+					btrfs_node_blockptr(c, i),
														
 
															+					btrfs_level_size(root, level - 1),
														
 
															+					btrfs_node_ptr_generation(c, i));
														
 
															+		if (btrfs_is_leaf(next) &&
														
 
															+		    btrfs_header_level(c) != 1)
														
 
															+			BUG();
														
 
															+		if (btrfs_header_level(next) !=
														
 
															+			btrfs_header_level(c) - 1)
														
 
															+			BUG();
														
 
															+		btrfs_print_tree(root, next);
														
 
															+		free_extent_buffer(next);
														
 
															+	}
														
 
															+}
														
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -0,0 +1,23 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __PRINT_TREE_
														
 
															+#define __PRINT_TREE_
														
 
															+void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
														
 
															+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t);
														
 
															+#endif
														
--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -0,0 +1,230 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/sched.h>
														
 
															+#include "ctree.h"
														
 
															+#include "ref-cache.h"
														
 
															+#include "transaction.h"
														
 
															+
														
 
															+/*
														
 
															+ * leaf refs are used to cache the information about which extents
														
 
															+ * a given leaf has references on.  This allows us to process that leaf
														
 
															+ * in btrfs_drop_snapshot without needing to read it back from disk.
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * kmalloc a leaf reference struct and update the counters for the
														
 
															+ * total ref cache size
														
 
															+ */
														
 
															+struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
														
 
															+					    int nr_extents)
														
 
															+{
														
 
															+	struct btrfs_leaf_ref *ref;
														
 
															+	size_t size = btrfs_leaf_ref_size(nr_extents);
														
 
															+
														
 
															+	ref = kmalloc(size, GFP_NOFS);
														
 
															+	if (ref) {
														
 
															+		spin_lock(&root->fs_info->ref_cache_lock);
														
 
															+		root->fs_info->total_ref_cache_size += size;
														
 
															+		spin_unlock(&root->fs_info->ref_cache_lock);
														
 
															+
														
 
															+		memset(ref, 0, sizeof(*ref));
														
 
															+		atomic_set(&ref->usage, 1);
														
 
															+		INIT_LIST_HEAD(&ref->list);
														
 
															+	}
														
 
															+	return ref;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * free a leaf reference struct and update the counters for the
														
 
															+ * total ref cache size
														
 
															+ */
														
 
															+void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
														
 
															+{
														
 
															+	if (!ref)
														
 
															+		return;
														
 
															+	WARN_ON(atomic_read(&ref->usage) == 0);
														
 
															+	if (atomic_dec_and_test(&ref->usage)) {
														
 
															+		size_t size = btrfs_leaf_ref_size(ref->nritems);
														
 
															+
														
 
															+		BUG_ON(ref->in_tree);
														
 
															+		kfree(ref);
														
 
															+
														
 
															+		spin_lock(&root->fs_info->ref_cache_lock);
														
 
															+		root->fs_info->total_ref_cache_size -= size;
														
 
															+		spin_unlock(&root->fs_info->ref_cache_lock);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
														
 
															+				   struct rb_node *node)
														
 
															+{
														
 
															+	struct rb_node ** p = &root->rb_node;
														
 
															+	struct rb_node * parent = NULL;
														
 
															+	struct btrfs_leaf_ref *entry;
														
 
															+
														
 
															+	while(*p) {
														
 
															+		parent = *p;
														
 
															+		entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
														
 
															+
														
 
															+		if (bytenr < entry->bytenr)
														
 
															+			p = &(*p)->rb_left;
														
 
															+		else if (bytenr > entry->bytenr)
														
 
															+			p = &(*p)->rb_right;
														
 
															+		else
														
 
															+			return parent;
														
 
															+	}
														
 
															+
														
 
															+	entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
														
 
															+	rb_link_node(node, parent, p);
														
 
															+	rb_insert_color(node, root);
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
														
 
															+{
														
 
															+	struct rb_node * n = root->rb_node;
														
 
															+	struct btrfs_leaf_ref *entry;
														
 
															+
														
 
															+	while(n) {
														
 
															+		entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
														
 
															+		WARN_ON(!entry->in_tree);
														
 
															+
														
 
															+		if (bytenr < entry->bytenr)
														
 
															+			n = n->rb_left;
														
 
															+		else if (bytenr > entry->bytenr)
														
 
															+			n = n->rb_right;
														
 
															+		else
														
 
															+			return n;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
														
 
															+			   int shared)
														
 
															+{
														
 
															+	struct btrfs_leaf_ref *ref = NULL;
														
 
															+	struct btrfs_leaf_ref_tree *tree = root->ref_tree;
														
 
															+
														
 
															+	if (shared)
														
 
															+		tree = &root->fs_info->shared_ref_tree;
														
 
															+	if (!tree)
														
 
															+		return 0;
														
 
															+
														
 
															+	spin_lock(&tree->lock);
														
 
															+	while(!list_empty(&tree->list)) {
														
 
															+		ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list);
														
 
															+		BUG_ON(ref->tree != tree);
														
 
															+		if (ref->root_gen > max_root_gen)
														
 
															+			break;
														
 
															+		if (!xchg(&ref->in_tree, 0)) {
														
 
															+			cond_resched_lock(&tree->lock);
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		rb_erase(&ref->rb_node, &tree->root);
														
 
															+		list_del_init(&ref->list);
														
 
															+
														
 
															+		spin_unlock(&tree->lock);
														
 
															+		btrfs_free_leaf_ref(root, ref);
														
 
															+		cond_resched();
														
 
															+		spin_lock(&tree->lock);
														
 
															+	}
														
 
															+	spin_unlock(&tree->lock);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * find the leaf ref for a given extent.  This returns the ref struct with
														
 
															+ * a usage reference incremented
														
 
															+ */
														
 
															+struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
														
 
															+					     u64 bytenr)
														
 
															+{
														
 
															+	struct rb_node *rb;
														
 
															+	struct btrfs_leaf_ref *ref = NULL;
														
 
															+	struct btrfs_leaf_ref_tree *tree = root->ref_tree;
														
 
															+again:
														
 
															+	if (tree) {
														
 
															+		spin_lock(&tree->lock);
														
 
															+		rb = tree_search(&tree->root, bytenr);
														
 
															+		if (rb)
														
 
															+			ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node);
														
 
															+		if (ref)
														
 
															+			atomic_inc(&ref->usage);
														
 
															+		spin_unlock(&tree->lock);
														
 
															+		if (ref)
														
 
															+			return ref;
														
 
															+	}
														
 
															+	if (tree != &root->fs_info->shared_ref_tree) {
														
 
															+		tree = &root->fs_info->shared_ref_tree;
														
 
															+		goto again;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * add a fully filled in leaf ref struct
														
 
															+ * remove all the refs older than a given root generation
														
 
															+ */
														
 
															+int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
														
 
															+		       int shared)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	struct rb_node *rb;
														
 
															+	struct btrfs_leaf_ref_tree *tree = root->ref_tree;
														
 
															+
														
 
															+	if (shared)
														
 
															+		tree = &root->fs_info->shared_ref_tree;
														
 
															+
														
 
															+	spin_lock(&tree->lock);
														
 
															+	rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node);
														
 
															+	if (rb) {
														
 
															+		ret = -EEXIST;
														
 
															+	} else {
														
 
															+		atomic_inc(&ref->usage);
														
 
															+		ref->tree = tree;
														
 
															+		ref->in_tree = 1;
														
 
															+		list_add_tail(&ref->list, &tree->list);
														
 
															+	}
														
 
															+	spin_unlock(&tree->lock);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * remove a single leaf ref from the tree.  This drops the ref held by the tree
														
 
															+ * only
														
 
															+ */
														
 
															+int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref)
														
 
															+{
														
 
															+	struct btrfs_leaf_ref_tree *tree;
														
 
															+
														
 
															+	if (!xchg(&ref->in_tree, 0))
														
 
															+		return 0;
														
 
															+
														
 
															+	tree = ref->tree;
														
 
															+	spin_lock(&tree->lock);
														
 
															+
														
 
															+	rb_erase(&ref->rb_node, &tree->root);
														
 
															+	list_del_init(&ref->list);
														
 
															+
														
 
															+	spin_unlock(&tree->lock);
														
 
															+
														
 
															+	btrfs_free_leaf_ref(root, ref);
														
 
															+	return 0;
														
 
															+}
														
--- a/fs/btrfs/ref-cache.h
+++ b/fs/btrfs/ref-cache.h
@@ -0,0 +1,77 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+#ifndef __REFCACHE__
														
 
															+#define __REFCACHE__
														
 
															+
														
 
															+struct btrfs_extent_info {
														
 
															+	/* bytenr and num_bytes find the extent in the extent allocation tree */
														
 
															+	u64 bytenr;
														
 
															+	u64 num_bytes;
														
 
															+
														
 
															+	/* objectid and offset find the back reference for the file */
														
 
															+	u64 objectid;
														
 
															+	u64 offset;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_leaf_ref {
														
 
															+	struct rb_node rb_node;
														
 
															+	struct btrfs_leaf_ref_tree *tree;
														
 
															+	int in_tree;
														
 
															+	atomic_t usage;
														
 
															+
														
 
															+	u64 root_gen;
														
 
															+	u64 bytenr;
														
 
															+	u64 owner;
														
 
															+	u64 generation;
														
 
															+	int nritems;
														
 
															+
														
 
															+	struct list_head list;
														
 
															+	struct btrfs_extent_info extents[];
														
 
															+};
														
 
															+
														
 
															+static inline size_t btrfs_leaf_ref_size(int nr_extents)
														
 
															+{
														
 
															+	return sizeof(struct btrfs_leaf_ref) +
														
 
															+	       sizeof(struct btrfs_extent_info) * nr_extents;
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree)
														
 
															+{
														
 
															+	tree->root.rb_node = NULL;
														
 
															+	INIT_LIST_HEAD(&tree->list);
														
 
															+	spin_lock_init(&tree->lock);
														
 
															+}
														
 
															+
														
 
															+static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree)
														
 
															+{
														
 
															+	return RB_EMPTY_ROOT(&tree->root);
														
 
															+}
														
 
															+
														
 
															+void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree);
														
 
															+struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root,
														
 
															+					    int nr_extents);
														
 
															+void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
														
 
															+struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root,
														
 
															+					     u64 bytenr);
														
 
															+int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref,
														
 
															+		       int shared);
														
 
															+int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen,
														
 
															+			   int shared);
														
 
															+int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref);
														
 
															+
														
 
															+#endif
														
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -0,0 +1,277 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "transaction.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "print-tree.h"
														
 
															+
														
 
															+/*
														
 
															+ *  search forward for a root, starting with objectid 'search_start'
														
 
															+ *  if a root key is found, the objectid we find is filled into 'found_objectid'
														
 
															+ *  and 0 is returned.  < 0 is returned on error, 1 if there is nothing
														
 
															+ *  left in the tree.
														
 
															+ */
														
 
															+int btrfs_search_root(struct btrfs_root *root, u64 search_start,
														
 
															+		      u64 *found_objectid)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key search_key;
														
 
															+	int ret;
														
 
															+
														
 
															+	root = root->fs_info->tree_root;
														
 
															+	search_key.objectid = search_start;
														
 
															+	search_key.type = (u8)-1;
														
 
															+	search_key.offset = (u64)-1;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+again:
														
 
															+	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	if (ret == 0) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
														
 
															+		ret = btrfs_next_leaf(root, path);
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+	}
														
 
															+	btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]);
														
 
															+	if (search_key.type != BTRFS_ROOT_ITEM_KEY) {
														
 
															+		search_key.offset++;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		goto again;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+	*found_objectid = search_key.objectid;
														
 
															+
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * lookup the root with the highest offset for a given objectid.  The key we do
														
 
															+ * find is copied into 'key'.  If we find something return 0, otherwise 1, < 0
														
 
															+ * on error.
														
 
															+ */
														
 
															+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
														
 
															+			struct btrfs_root_item *item, struct btrfs_key *key)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key search_key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *l;
														
 
															+	int ret;
														
 
															+	int slot;
														
 
															+
														
 
															+	search_key.objectid = objectid;
														
 
															+	search_key.type = (u8)-1;
														
 
															+	search_key.offset = (u64)-1;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	BUG_ON(ret == 0);
														
 
															+	l = path->nodes[0];
														
 
															+	BUG_ON(path->slots[0] == 0);
														
 
															+	slot = path->slots[0] - 1;
														
 
															+	btrfs_item_key_to_cpu(l, &found_key, slot);
														
 
															+	if (found_key.objectid != objectid) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
														
 
															+			   sizeof(*item));
														
 
															+	memcpy(key, &found_key, sizeof(found_key));
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * copy the data in 'item' into the btree
														
 
															+ */
														
 
															+int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *key, struct btrfs_root_item
														
 
															+		      *item)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *l;
														
 
															+	int ret;
														
 
															+	int slot;
														
 
															+	unsigned long ptr;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	ret = btrfs_search_slot(trans, root, key, path, 0, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (ret != 0) {
														
 
															+		btrfs_print_leaf(root, path->nodes[0]);
														
 
															+		printk("unable to update root key %Lu %u %Lu\n",
														
 
															+		       key->objectid, key->type, key->offset);
														
 
															+		BUG_ON(1);
														
 
															+	}
														
 
															+
														
 
															+	l = path->nodes[0];
														
 
															+	slot = path->slots[0];
														
 
															+	ptr = btrfs_item_ptr_offset(l, slot);
														
 
															+	write_extent_buffer(l, item, ptr, sizeof(*item));
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
														
 
															+		      *root, struct btrfs_key *key, struct btrfs_root_item
														
 
															+		      *item)
														
 
															+{
														
 
															+	int ret;
														
 
															+	ret = btrfs_insert_item(trans, root, key, item, sizeof(*item));
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * at mount time we want to find all the old transaction snapshots that were in
														
 
															+ * the process of being deleted if we crashed.  This is any root item with an offset
														
 
															+ * lower than the latest root.  They need to be queued for deletion to finish
														
 
															+ * what was happening when we crashed.
														
 
															+ */
														
 
															+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
														
 
															+			  struct btrfs_root *latest)
														
 
															+{
														
 
															+	struct btrfs_root *dead_root;
														
 
															+	struct btrfs_item *item;
														
 
															+	struct btrfs_root_item *ri;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	u32 nritems;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int slot;
														
 
															+
														
 
															+	key.objectid = objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
														
 
															+	key.offset = 0;
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+again:
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto err;
														
 
															+	while(1) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		slot = path->slots[0];
														
 
															+		if (slot >= nritems) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret)
														
 
															+				break;
														
 
															+			leaf = path->nodes[0];
														
 
															+			nritems = btrfs_header_nritems(leaf);
														
 
															+			slot = path->slots[0];
														
 
															+		}
														
 
															+		item = btrfs_item_nr(leaf, slot);
														
 
															+		btrfs_item_key_to_cpu(leaf, &key, slot);
														
 
															+		if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
														
 
															+			goto next;
														
 
															+
														
 
															+		if (key.objectid < objectid)
														
 
															+			goto next;
														
 
															+
														
 
															+		if (key.objectid > objectid)
														
 
															+			break;
														
 
															+
														
 
															+		ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item);
														
 
															+		if (btrfs_disk_root_refs(leaf, ri) != 0)
														
 
															+			goto next;
														
 
															+
														
 
															+		memcpy(&found_key, &key, sizeof(key));
														
 
															+		key.offset++;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		dead_root =
														
 
															+			btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
														
 
															+						    &found_key);
														
 
															+		if (IS_ERR(dead_root)) {
														
 
															+			ret = PTR_ERR(dead_root);
														
 
															+			goto err;
														
 
															+		}
														
 
															+
														
 
															+		if (objectid == BTRFS_TREE_RELOC_OBJECTID)
														
 
															+			ret = btrfs_add_dead_reloc_root(dead_root);
														
 
															+		else
														
 
															+			ret = btrfs_add_dead_root(dead_root, latest);
														
 
															+		if (ret)
														
 
															+			goto err;
														
 
															+		goto again;
														
 
															+next:
														
 
															+		slot++;
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+err:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/* drop the root item for 'key' from 'root' */
														
 
															+int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
														
 
															+		   struct btrfs_key *key)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	u32 refs;
														
 
															+	struct btrfs_root_item *ri;
														
 
															+	struct extent_buffer *leaf;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+	ret = btrfs_search_slot(trans, root, key, path, -1, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	if (ret) {
														
 
															+btrfs_print_leaf(root, path->nodes[0]);
														
 
															+printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset);
														
 
															+
														
 
															+	}
														
 
															+	BUG_ON(ret != 0);
														
 
															+	leaf = path->nodes[0];
														
 
															+	ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
														
 
															+
														
 
															+	refs = btrfs_disk_root_refs(leaf, ri);
														
 
															+	BUG_ON(refs != 0);
														
 
															+	ret = btrfs_del_item(trans, root, path);
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -0,0 +1,132 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/highmem.h>
														
 
															+
														
 
															+/* this is some deeply nasty code.  ctree.h has a different
														
 
															+ * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
														
 
															+ *
														
 
															+ * The end result is that anyone who #includes ctree.h gets a
														
 
															+ * declaration for the btrfs_set_foo functions and btrfs_foo functions
														
 
															+ *
														
 
															+ * This file declares the macros and then #includes ctree.h, which results
														
 
															+ * in cpp creating the function here based on the template below.
														
 
															+ *
														
 
															+ * These setget functions do all the extent_buffer related mapping
														
 
															+ * required to efficiently read and write specific fields in the extent
														
 
															+ * buffers.  Every pointer to metadata items in btrfs is really just
														
 
															+ * an unsigned long offset into the extent buffer which has been
														
 
															+ * cast to a specific type.  This gives us all the gcc type checking.
														
 
															+ *
														
 
															+ * The extent buffer api is used to do all the kmapping and page
														
 
															+ * spanning work required to get extent buffers in highmem and have
														
 
															+ * a metadata blocksize different from the page size.
														
 
															+ */
														
 
															+
														
 
															+#define BTRFS_SETGET_FUNCS(name, type, member, bits)			\
														
 
															+u##bits btrfs_##name(struct extent_buffer *eb,				\
														
 
															+				   type *s)				\
														
 
															+{									\
														
 
															+	unsigned long part_offset = (unsigned long)s;			\
														
 
															+	unsigned long offset = part_offset + offsetof(type, member);	\
														
 
															+	type *p;							\
														
 
															+	/* ugly, but we want the fast path here */			\
														
 
															+	if (eb->map_token && offset >= eb->map_start &&			\
														
 
															+	    offset + sizeof(((type *)0)->member) <= eb->map_start +	\
														
 
															+	    eb->map_len) {						\
														
 
															+		p = (type *)(eb->kaddr + part_offset - eb->map_start);	\
														
 
															+		return le##bits##_to_cpu(p->member);			\
														
 
															+	}								\
														
 
															+	{								\
														
 
															+		int err;						\
														
 
															+		char *map_token;					\
														
 
															+		char *kaddr;						\
														
 
															+		int unmap_on_exit = (eb->map_token == NULL);		\
														
 
															+		unsigned long map_start;				\
														
 
															+		unsigned long map_len;					\
														
 
															+		__le##bits res;						\
														
 
															+		err = map_extent_buffer(eb, offset,			\
														
 
															+			        sizeof(((type *)0)->member),		\
														
 
															+				&map_token, &kaddr,			\
														
 
															+				&map_start, &map_len, KM_USER1);	\
														
 
															+		if (err) {						\
														
 
															+			read_eb_member(eb, s, type, member, &res);	\
														
 
															+			return le##bits##_to_cpu(res);			\
														
 
															+		}							\
														
 
															+		p = (type *)(kaddr + part_offset - map_start);		\
														
 
															+		res = le##bits##_to_cpu(p->member);			\
														
 
															+		if (unmap_on_exit)					\
														
 
															+			unmap_extent_buffer(eb, map_token, KM_USER1);	\
														
 
															+		return res;						\
														
 
															+	}								\
														
 
															+}									\
														
 
															+void btrfs_set_##name(struct extent_buffer *eb,				\
														
 
															+				    type *s, u##bits val)		\
														
 
															+{									\
														
 
															+	unsigned long part_offset = (unsigned long)s;			\
														
 
															+	unsigned long offset = part_offset + offsetof(type, member);	\
														
 
															+	type *p;							\
														
 
															+	/* ugly, but we want the fast path here */			\
														
 
															+	if (eb->map_token && offset >= eb->map_start &&			\
														
 
															+	    offset + sizeof(((type *)0)->member) <= eb->map_start +	\
														
 
															+	    eb->map_len) {						\
														
 
															+		p = (type *)(eb->kaddr + part_offset - eb->map_start);	\
														
 
															+		p->member = cpu_to_le##bits(val);			\
														
 
															+		return;							\
														
 
															+	}								\
														
 
															+	{								\
														
 
															+		int err;						\
														
 
															+		char *map_token;					\
														
 
															+		char *kaddr;						\
														
 
															+		int unmap_on_exit = (eb->map_token == NULL);		\
														
 
															+		unsigned long map_start;				\
														
 
															+		unsigned long map_len;					\
														
 
															+		err = map_extent_buffer(eb, offset,			\
														
 
															+			        sizeof(((type *)0)->member),		\
														
 
															+				&map_token, &kaddr,			\
														
 
															+				&map_start, &map_len, KM_USER1);	\
														
 
															+		if (err) {						\
														
 
															+			val = cpu_to_le##bits(val);			\
														
 
															+			write_eb_member(eb, s, type, member, &val);	\
														
 
															+			return;						\
														
 
															+		}							\
														
 
															+		p = (type *)(kaddr + part_offset - map_start);		\
														
 
															+		p->member = cpu_to_le##bits(val);			\
														
 
															+		if (unmap_on_exit)					\
														
 
															+			unmap_extent_buffer(eb, map_token, KM_USER1);	\
														
 
															+	}								\
														
 
															+}
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+
														
 
															+void btrfs_node_key(struct extent_buffer *eb,
														
 
															+		    struct btrfs_disk_key *disk_key, int nr)
														
 
															+{
														
 
															+	unsigned long ptr = btrfs_node_key_ptr_offset(nr);
														
 
															+	if (eb->map_token && ptr >= eb->map_start &&
														
 
															+	    ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
														
 
															+		memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
														
 
															+			sizeof(*disk_key));
														
 
															+		return;
														
 
															+	} else if (eb->map_token) {
														
 
															+		unmap_extent_buffer(eb, eb->map_token, KM_USER1);
														
 
															+		eb->map_token = NULL;
														
 
															+	}
														
 
															+	read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
														
 
															+		       struct btrfs_key_ptr, key, disk_key);
														
 
															+}
														
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -0,0 +1,659 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/blkdev.h>
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/buffer_head.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include <linux/highmem.h>
														
 
															+#include <linux/time.h>
														
 
															+#include <linux/init.h>
														
 
															+#include <linux/string.h>
														
 
															+#include <linux/smp_lock.h>
														
 
															+#include <linux/backing-dev.h>
														
 
															+#include <linux/mount.h>
														
 
															+#include <linux/mpage.h>
														
 
															+#include <linux/swap.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/statfs.h>
														
 
															+#include <linux/compat.h>
														
 
															+#include <linux/parser.h>
														
 
															+#include <linux/ctype.h>
														
 
															+#include <linux/namei.h>
														
 
															+#include <linux/miscdevice.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "ioctl.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "xattr.h"
														
 
															+#include "volumes.h"
														
 
															+#include "version.h"
														
 
															+#include "export.h"
														
 
															+
														
 
															+#define BTRFS_SUPER_MAGIC 0x9123683E
														
 
															+
														
 
															+static struct super_operations btrfs_super_ops;
														
 
															+
														
 
															+static void btrfs_put_super (struct super_block * sb)
														
 
															+{
														
 
															+	struct btrfs_root *root = btrfs_sb(sb);
														
 
															+	struct btrfs_fs_info *fs = root->fs_info;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = close_ctree(root);
														
 
															+	if (ret) {
														
 
															+		printk("close ctree returns %d\n", ret);
														
 
															+	}
														
 
															+	btrfs_sysfs_del_super(fs);
														
 
															+	sb->s_fs_info = NULL;
														
 
															+}
														
 
															+
														
 
															+enum {
														
 
															+	Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
														
 
															+	Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
														
 
															+	Opt_ssd, Opt_thread_pool, Opt_noacl,  Opt_err,
														
 
															+};
														
 
															+
														
 
															+static match_table_t tokens = {
														
 
															+	{Opt_degraded, "degraded"},
														
 
															+	{Opt_subvol, "subvol=%s"},
														
 
															+	{Opt_device, "device=%s"},
														
 
															+	{Opt_nodatasum, "nodatasum"},
														
 
															+	{Opt_nodatacow, "nodatacow"},
														
 
															+	{Opt_nobarrier, "nobarrier"},
														
 
															+	{Opt_max_extent, "max_extent=%s"},
														
 
															+	{Opt_max_inline, "max_inline=%s"},
														
 
															+	{Opt_alloc_start, "alloc_start=%s"},
														
 
															+	{Opt_thread_pool, "thread_pool=%d"},
														
 
															+	{Opt_ssd, "ssd"},
														
 
															+	{Opt_noacl, "noacl"},
														
 
															+	{Opt_err, NULL},
														
 
															+};
														
 
															+
														
 
															+u64 btrfs_parse_size(char *str)
														
 
															+{
														
 
															+	u64 res;
														
 
															+	int mult = 1;
														
 
															+	char *end;
														
 
															+	char last;
														
 
															+
														
 
															+	res = simple_strtoul(str, &end, 10);
														
 
															+
														
 
															+	last = end[0];
														
 
															+	if (isalpha(last)) {
														
 
															+		last = tolower(last);
														
 
															+		switch (last) {
														
 
															+		case 'g':
														
 
															+			mult *= 1024;
														
 
															+		case 'm':
														
 
															+			mult *= 1024;
														
 
															+		case 'k':
														
 
															+			mult *= 1024;
														
 
															+		}
														
 
															+		res = res * mult;
														
 
															+	}
														
 
															+	return res;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Regular mount options parser.  Everything that is needed only when
														
 
															+ * reading in a new superblock is parsed here.
														
 
															+ */
														
 
															+int btrfs_parse_options(struct btrfs_root *root, char *options)
														
 
															+{
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	substring_t args[MAX_OPT_ARGS];
														
 
															+	char *p, *num;
														
 
															+	int intarg;
														
 
															+
														
 
															+	if (!options)
														
 
															+		return 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * strsep changes the string, duplicate it because parse_options
														
 
															+	 * gets called twice
														
 
															+	 */
														
 
															+	options = kstrdup(options, GFP_NOFS);
														
 
															+	if (!options)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+
														
 
															+	while ((p = strsep(&options, ",")) != NULL) {
														
 
															+		int token;
														
 
															+		if (!*p)
														
 
															+			continue;
														
 
															+
														
 
															+		token = match_token(p, tokens, args);
														
 
															+		switch (token) {
														
 
															+		case Opt_degraded:
														
 
															+			printk(KERN_INFO "btrfs: allowing degraded mounts\n");
														
 
															+			btrfs_set_opt(info->mount_opt, DEGRADED);
														
 
															+			break;
														
 
															+		case Opt_subvol:
														
 
															+		case Opt_device:
														
 
															+			/*
														
 
															+			 * These are parsed by btrfs_parse_early_options
														
 
															+			 * and can be happily ignored here.
														
 
															+			 */
														
 
															+			break;
														
 
															+		case Opt_nodatasum:
														
 
															+			printk(KERN_INFO "btrfs: setting nodatacsum\n");
														
 
															+			btrfs_set_opt(info->mount_opt, NODATASUM);
														
 
															+			break;
														
 
															+		case Opt_nodatacow:
														
 
															+			printk(KERN_INFO "btrfs: setting nodatacow\n");
														
 
															+			btrfs_set_opt(info->mount_opt, NODATACOW);
														
 
															+			btrfs_set_opt(info->mount_opt, NODATASUM);
														
 
															+			break;
														
 
															+		case Opt_ssd:
														
 
															+			printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
														
 
															+			btrfs_set_opt(info->mount_opt, SSD);
														
 
															+			break;
														
 
															+		case Opt_nobarrier:
														
 
															+			printk(KERN_INFO "btrfs: turning off barriers\n");
														
 
															+			btrfs_set_opt(info->mount_opt, NOBARRIER);
														
 
															+			break;
														
 
															+		case Opt_thread_pool:
														
 
															+			intarg = 0;
														
 
															+			match_int(&args[0], &intarg);
														
 
															+			if (intarg) {
														
 
															+				info->thread_pool_size = intarg;
														
 
															+				printk(KERN_INFO "btrfs: thread pool %d\n",
														
 
															+				       info->thread_pool_size);
														
 
															+			}
														
 
															+			break;
														
 
															+		case Opt_max_extent:
														
 
															+			num = match_strdup(&args[0]);
														
 
															+			if (num) {
														
 
															+				info->max_extent = btrfs_parse_size(num);
														
 
															+				kfree(num);
														
 
															+
														
 
															+				info->max_extent = max_t(u64,
														
 
															+					info->max_extent, root->sectorsize);
														
 
															+				printk(KERN_INFO "btrfs: max_extent at %llu\n",
														
 
															+				       info->max_extent);
														
 
															+			}
														
 
															+			break;
														
 
															+		case Opt_max_inline:
														
 
															+			num = match_strdup(&args[0]);
														
 
															+			if (num) {
														
 
															+				info->max_inline = btrfs_parse_size(num);
														
 
															+				kfree(num);
														
 
															+
														
 
															+				if (info->max_inline) {
														
 
															+					info->max_inline = max_t(u64,
														
 
															+						info->max_inline,
														
 
															+						root->sectorsize);
														
 
															+				}
														
 
															+				printk(KERN_INFO "btrfs: max_inline at %llu\n",
														
 
															+					info->max_inline);
														
 
															+			}
														
 
															+			break;
														
 
															+		case Opt_alloc_start:
														
 
															+			num = match_strdup(&args[0]);
														
 
															+			if (num) {
														
 
															+				info->alloc_start = btrfs_parse_size(num);
														
 
															+				kfree(num);
														
 
															+				printk(KERN_INFO
														
 
															+					"btrfs: allocations start at %llu\n",
														
 
															+					info->alloc_start);
														
 
															+			}
														
 
															+			break;
														
 
															+		case Opt_noacl:
														
 
															+			root->fs_info->sb->s_flags &= ~MS_POSIXACL;
														
 
															+			break;
														
 
															+		default:
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+	kfree(options);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Parse mount options that are required early in the mount process.
														
 
															+ *
														
 
															+ * All other options will be parsed on much later in the mount process and
														
 
															+ * only when we need to allocate a new super block.
														
 
															+ */
														
 
															+static int btrfs_parse_early_options(const char *options, int flags,
														
 
															+		void *holder, char **subvol_name,
														
 
															+		struct btrfs_fs_devices **fs_devices)
														
 
															+{
														
 
															+	substring_t args[MAX_OPT_ARGS];
														
 
															+	char *opts, *p;
														
 
															+	int error = 0;
														
 
															+
														
 
															+	if (!options)
														
 
															+		goto out;
														
 
															+
														
 
															+	/*
														
 
															+	 * strsep changes the string, duplicate it because parse_options
														
 
															+	 * gets called twice
														
 
															+	 */
														
 
															+	opts = kstrdup(options, GFP_KERNEL);
														
 
															+	if (!opts)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	while ((p = strsep(&opts, ",")) != NULL) {
														
 
															+		int token;
														
 
															+		if (!*p)
														
 
															+			continue;
														
 
															+
														
 
															+		token = match_token(p, tokens, args);
														
 
															+		switch (token) {
														
 
															+		case Opt_subvol:
														
 
															+			*subvol_name = match_strdup(&args[0]);
														
 
															+			break;
														
 
															+		case Opt_device:
														
 
															+			error = btrfs_scan_one_device(match_strdup(&args[0]),
														
 
															+					flags, holder, fs_devices);
														
 
															+			if (error)
														
 
															+				goto out_free_opts;
														
 
															+			break;
														
 
															+		default:
														
 
															+			break;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+ out_free_opts:
														
 
															+	kfree(opts);
														
 
															+ out:
														
 
															+	/*
														
 
															+	 * If no subvolume name is specified we use the default one.  Allocate
														
 
															+	 * a copy of the string "default" here so that code later in the
														
 
															+	 * mount path doesn't care if it's the default volume or another one.
														
 
															+	 */
														
 
															+	if (!*subvol_name) {
														
 
															+		*subvol_name = kstrdup("default", GFP_KERNEL);
														
 
															+		if (!*subvol_name)
														
 
															+			return -ENOMEM;
														
 
															+	}
														
 
															+	return error;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_fill_super(struct super_block * sb,
														
 
															+			    struct btrfs_fs_devices *fs_devices,
														
 
															+			    void * data, int silent)
														
 
															+{
														
 
															+	struct inode * inode;
														
 
															+	struct dentry * root_dentry;
														
 
															+	struct btrfs_super_block *disk_super;
														
 
															+	struct btrfs_root *tree_root;
														
 
															+	struct btrfs_inode *bi;
														
 
															+	int err;
														
 
															+
														
 
															+	sb->s_maxbytes = MAX_LFS_FILESIZE;
														
 
															+	sb->s_magic = BTRFS_SUPER_MAGIC;
														
 
															+	sb->s_op = &btrfs_super_ops;
														
 
															+	sb->s_export_op = &btrfs_export_ops;
														
 
															+	sb->s_xattr = btrfs_xattr_handlers;
														
 
															+	sb->s_time_gran = 1;
														
 
															+	sb->s_flags |= MS_POSIXACL;
														
 
															+
														
 
															+	tree_root = open_ctree(sb, fs_devices, (char *)data);
														
 
															+
														
 
															+	if (IS_ERR(tree_root)) {
														
 
															+		printk("btrfs: open_ctree failed\n");
														
 
															+		return PTR_ERR(tree_root);
														
 
															+	}
														
 
															+	sb->s_fs_info = tree_root;
														
 
															+	disk_super = &tree_root->fs_info->super_copy;
														
 
															+	inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
														
 
															+				  tree_root);
														
 
															+	bi = BTRFS_I(inode);
														
 
															+	bi->location.objectid = inode->i_ino;
														
 
															+	bi->location.offset = 0;
														
 
															+	bi->root = tree_root;
														
 
															+
														
 
															+	btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
														
 
															+
														
 
															+	if (!inode) {
														
 
															+		err = -ENOMEM;
														
 
															+		goto fail_close;
														
 
															+	}
														
 
															+	if (inode->i_state & I_NEW) {
														
 
															+		btrfs_read_locked_inode(inode);
														
 
															+		unlock_new_inode(inode);
														
 
															+	}
														
 
															+
														
 
															+	root_dentry = d_alloc_root(inode);
														
 
															+	if (!root_dentry) {
														
 
															+		iput(inode);
														
 
															+		err = -ENOMEM;
														
 
															+		goto fail_close;
														
 
															+	}
														
 
															+
														
 
															+	/* this does the super kobj at the same time */
														
 
															+	err = btrfs_sysfs_add_super(tree_root->fs_info);
														
 
															+	if (err)
														
 
															+		goto fail_close;
														
 
															+
														
 
															+	sb->s_root = root_dentry;
														
 
															+
														
 
															+	save_mount_options(sb, data);
														
 
															+	return 0;
														
 
															+
														
 
															+fail_close:
														
 
															+	close_ctree(tree_root);
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+int btrfs_sync_fs(struct super_block *sb, int wait)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root;
														
 
															+	int ret;
														
 
															+	root = btrfs_sb(sb);
														
 
															+
														
 
															+	sb->s_dirt = 0;
														
 
															+	if (!wait) {
														
 
															+		filemap_flush(root->fs_info->btree_inode->i_mapping);
														
 
															+		return 0;
														
 
															+	}
														
 
															+	btrfs_clean_old_snapshots(root);
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	ret = btrfs_commit_transaction(trans, root);
														
 
															+	sb->s_dirt = 0;
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void btrfs_write_super(struct super_block *sb)
														
 
															+{
														
 
															+	sb->s_dirt = 0;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_test_super(struct super_block *s, void *data)
														
 
															+{
														
 
															+	struct btrfs_fs_devices *test_fs_devices = data;
														
 
															+	struct btrfs_root *root = btrfs_sb(s);
														
 
															+
														
 
															+	return root->fs_info->fs_devices == test_fs_devices;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Find a superblock for the given device / mount point.
														
 
															+ *
														
 
															+ * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
														
 
															+ *	  for multiple device setup.  Make sure to keep it in sync.
														
 
															+ */
														
 
															+static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
														
 
															+		const char *dev_name, void *data, struct vfsmount *mnt)
														
 
															+{
														
 
															+	char *subvol_name = NULL;
														
 
															+	struct block_device *bdev = NULL;
														
 
															+	struct super_block *s;
														
 
															+	struct dentry *root;
														
 
															+	struct btrfs_fs_devices *fs_devices = NULL;
														
 
															+	int error = 0;
														
 
															+
														
 
															+	error = btrfs_parse_early_options(data, flags, fs_type,
														
 
															+					  &subvol_name, &fs_devices);
														
 
															+	if (error)
														
 
															+		goto error;
														
 
															+
														
 
															+	error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices);
														
 
															+	if (error)
														
 
															+		goto error_free_subvol_name;
														
 
															+
														
 
															+	error = btrfs_open_devices(fs_devices, flags, fs_type);
														
 
															+	if (error)
														
 
															+		goto error_free_subvol_name;
														
 
															+
														
 
															+	bdev = fs_devices->latest_bdev;
														
 
															+	s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
														
 
															+	if (IS_ERR(s))
														
 
															+		goto error_s;
														
 
															+
														
 
															+	if (s->s_root) {
														
 
															+		if ((flags ^ s->s_flags) & MS_RDONLY) {
														
 
															+			up_write(&s->s_umount);
														
 
															+			deactivate_super(s);
														
 
															+			error = -EBUSY;
														
 
															+			goto error_bdev;
														
 
															+		}
														
 
															+
														
 
															+	} else {
														
 
															+		char b[BDEVNAME_SIZE];
														
 
															+
														
 
															+		s->s_flags = flags;
														
 
															+		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
														
 
															+		error = btrfs_fill_super(s, fs_devices, data,
														
 
															+					 flags & MS_SILENT ? 1 : 0);
														
 
															+		if (error) {
														
 
															+			up_write(&s->s_umount);
														
 
															+			deactivate_super(s);
														
 
															+			goto error;
														
 
															+		}
														
 
															+
														
 
															+		btrfs_sb(s)->fs_info->bdev_holder = fs_type;
														
 
															+		s->s_flags |= MS_ACTIVE;
														
 
															+	}
														
 
															+
														
 
															+	if (!strcmp(subvol_name, "."))
														
 
															+		root = dget(s->s_root);
														
 
															+	else {
														
 
															+		mutex_lock(&s->s_root->d_inode->i_mutex);
														
 
															+		root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name));
														
 
															+		mutex_unlock(&s->s_root->d_inode->i_mutex);
														
 
															+		if (IS_ERR(root)) {
														
 
															+			up_write(&s->s_umount);
														
 
															+			deactivate_super(s);
														
 
															+			error = PTR_ERR(root);
														
 
															+			goto error;
														
 
															+		}
														
 
															+		if (!root->d_inode) {
														
 
															+			dput(root);
														
 
															+			up_write(&s->s_umount);
														
 
															+			deactivate_super(s);
														
 
															+			error = -ENXIO;
														
 
															+			goto error;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	mnt->mnt_sb = s;
														
 
															+	mnt->mnt_root = root;
														
 
															+
														
 
															+	kfree(subvol_name);
														
 
															+	return 0;
														
 
															+
														
 
															+error_s:
														
 
															+	error = PTR_ERR(s);
														
 
															+error_bdev:
														
 
															+	btrfs_close_devices(fs_devices);
														
 
															+error_free_subvol_name:
														
 
															+	kfree(subvol_name);
														
 
															+error:
														
 
															+	return error;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
														
 
															+{
														
 
															+	struct btrfs_root *root = btrfs_sb(dentry->d_sb);
														
 
															+	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
														
 
															+	int bits = dentry->d_sb->s_blocksize_bits;
														
 
															+	__be32 *fsid = (__be32 *)root->fs_info->fsid;
														
 
															+
														
 
															+	buf->f_namelen = BTRFS_NAME_LEN;
														
 
															+	buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
														
 
															+	buf->f_bfree = buf->f_blocks -
														
 
															+		(btrfs_super_bytes_used(disk_super) >> bits);
														
 
															+	buf->f_bavail = buf->f_bfree;
														
 
															+	buf->f_bsize = dentry->d_sb->s_blocksize;
														
 
															+	buf->f_type = BTRFS_SUPER_MAGIC;
														
 
															+	/* We treat it as constant endianness (it doesn't matter _which_)
														
 
															+	   because we want the fsid to come out the same whether mounted 
														
 
															+	   on a big-endian or little-endian host */
														
 
															+	buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
														
 
															+	buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
														
 
															+	/* Mask in the root object ID too, to disambiguate subvols */
														
 
															+	buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
														
 
															+	buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static struct file_system_type btrfs_fs_type = {
														
 
															+	.owner		= THIS_MODULE,
														
 
															+	.name		= "btrfs",
														
 
															+	.get_sb		= btrfs_get_sb,
														
 
															+	.kill_sb	= kill_anon_super,
														
 
															+	.fs_flags	= FS_REQUIRES_DEV,
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * used by btrfsctl to scan devices when no FS is mounted
														
 
															+ */
														
 
															+static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
														
 
															+				unsigned long arg)
														
 
															+{
														
 
															+	struct btrfs_ioctl_vol_args *vol;
														
 
															+	struct btrfs_fs_devices *fs_devices;
														
 
															+	int ret = 0;
														
 
															+	int len;
														
 
															+
														
 
															+	vol = kmalloc(sizeof(*vol), GFP_KERNEL);
														
 
															+	if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
														
 
															+		ret = -EFAULT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	len = strnlen(vol->name, BTRFS_PATH_NAME_MAX);
														
 
															+	switch (cmd) {
														
 
															+	case BTRFS_IOC_SCAN_DEV:
														
 
															+		ret = btrfs_scan_one_device(vol->name, MS_RDONLY,
														
 
															+					    &btrfs_fs_type, &fs_devices);
														
 
															+		break;
														
 
															+	}
														
 
															+out:
														
 
															+	kfree(vol);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void btrfs_write_super_lockfs(struct super_block *sb)
														
 
															+{
														
 
															+	struct btrfs_root *root = btrfs_sb(sb);
														
 
															+	mutex_lock(&root->fs_info->transaction_kthread_mutex);
														
 
															+	mutex_lock(&root->fs_info->cleaner_mutex);
														
 
															+}
														
 
															+
														
 
															+static void btrfs_unlockfs(struct super_block *sb)
														
 
															+{
														
 
															+	struct btrfs_root *root = btrfs_sb(sb);
														
 
															+	mutex_unlock(&root->fs_info->cleaner_mutex);
														
 
															+	mutex_unlock(&root->fs_info->transaction_kthread_mutex);
														
 
															+}
														
 
															+
														
 
															+static struct super_operations btrfs_super_ops = {
														
 
															+	.delete_inode	= btrfs_delete_inode,
														
 
															+	.put_super	= btrfs_put_super,
														
 
															+	.write_super	= btrfs_write_super,
														
 
															+	.sync_fs	= btrfs_sync_fs,
														
 
															+	.show_options	= generic_show_options,
														
 
															+	.write_inode	= btrfs_write_inode,
														
 
															+	.dirty_inode	= btrfs_dirty_inode,
														
 
															+	.alloc_inode	= btrfs_alloc_inode,
														
 
															+	.destroy_inode	= btrfs_destroy_inode,
														
 
															+	.statfs		= btrfs_statfs,
														
 
															+	.write_super_lockfs = btrfs_write_super_lockfs,
														
 
															+	.unlockfs	= btrfs_unlockfs,
														
 
															+};
														
 
															+
														
 
															+static const struct file_operations btrfs_ctl_fops = {
														
 
															+	.unlocked_ioctl	 = btrfs_control_ioctl,
														
 
															+	.compat_ioctl = btrfs_control_ioctl,
														
 
															+	.owner	 = THIS_MODULE,
														
 
															+};
														
 
															+
														
 
															+static struct miscdevice btrfs_misc = {
														
 
															+	.minor		= MISC_DYNAMIC_MINOR,
														
 
															+	.name		= "btrfs-control",
														
 
															+	.fops		= &btrfs_ctl_fops
														
 
															+};
														
 
															+
														
 
															+static int btrfs_interface_init(void)
														
 
															+{
														
 
															+	return misc_register(&btrfs_misc);
														
 
															+}
														
 
															+
														
 
															+void btrfs_interface_exit(void)
														
 
															+{
														
 
															+	if (misc_deregister(&btrfs_misc) < 0)
														
 
															+		printk("misc_deregister failed for control device");
														
 
															+}
														
 
															+
														
 
															+static int __init init_btrfs_fs(void)
														
 
															+{
														
 
															+	int err;
														
 
															+
														
 
															+	err = btrfs_init_sysfs();
														
 
															+	if (err)
														
 
															+		return err;
														
 
															+
														
 
															+	err = btrfs_init_cachep();
														
 
															+	if (err)
														
 
															+		goto free_sysfs;
														
 
															+
														
 
															+	err = extent_io_init();
														
 
															+	if (err)
														
 
															+		goto free_cachep;
														
 
															+
														
 
															+	err = extent_map_init();
														
 
															+	if (err)
														
 
															+		goto free_extent_io;
														
 
															+
														
 
															+	err = btrfs_interface_init();
														
 
															+	if (err)
														
 
															+		goto free_extent_map;
														
 
															+	err = register_filesystem(&btrfs_fs_type);
														
 
															+	if (err)
														
 
															+		goto unregister_ioctl;
														
 
															+
														
 
															+	printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
														
 
															+	return 0;
														
 
															+
														
 
															+unregister_ioctl:
														
 
															+	btrfs_interface_exit();
														
 
															+free_extent_map:
														
 
															+	extent_map_exit();
														
 
															+free_extent_io:
														
 
															+	extent_io_exit();
														
 
															+free_cachep:
														
 
															+	btrfs_destroy_cachep();
														
 
															+free_sysfs:
														
 
															+	btrfs_exit_sysfs();
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+static void __exit exit_btrfs_fs(void)
														
 
															+{
														
 
															+	btrfs_destroy_cachep();
														
 
															+	extent_map_exit();
														
 
															+	extent_io_exit();
														
 
															+	btrfs_interface_exit();
														
 
															+	unregister_filesystem(&btrfs_fs_type);
														
 
															+	btrfs_exit_sysfs();
														
 
															+	btrfs_cleanup_fs_uuids();
														
 
															+}
														
 
															+
														
 
															+module_init(init_btrfs_fs)
														
 
															+module_exit(exit_btrfs_fs)
														
 
															+
														
 
															+MODULE_LICENSE("GPL");
														
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -0,0 +1,268 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/completion.h>
														
 
															+#include <linux/buffer_head.h>
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/kobject.h>
														
 
															+
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+
														
 
															+static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
														
 
															+{
														
 
															+	return snprintf(buf, PAGE_SIZE, "%llu\n",
														
 
															+		(unsigned long long)btrfs_root_used(&root->root_item));
														
 
															+}
														
 
															+
														
 
															+static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
														
 
															+{
														
 
															+	return snprintf(buf, PAGE_SIZE, "%llu\n",
														
 
															+		(unsigned long long)btrfs_root_limit(&root->root_item));
														
 
															+}
														
 
															+
														
 
															+static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
														
 
															+{
														
 
															+
														
 
															+	return snprintf(buf, PAGE_SIZE, "%llu\n",
														
 
															+		(unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
														
 
															+}
														
 
															+
														
 
															+static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
														
 
															+{
														
 
															+	return snprintf(buf, PAGE_SIZE, "%llu\n",
														
 
															+		(unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
														
 
															+}
														
 
															+
														
 
															+static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
														
 
															+{
														
 
															+	return snprintf(buf, PAGE_SIZE, "%llu\n",
														
 
															+		(unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
														
 
															+}
														
 
															+
														
 
															+/* this is for root attrs (subvols/snapshots) */
														
 
															+struct btrfs_root_attr {
														
 
															+	struct attribute attr;
														
 
															+	ssize_t (*show)(struct btrfs_root *, char *);
														
 
															+	ssize_t (*store)(struct btrfs_root *, const char *, size_t);
														
 
															+};
														
 
															+
														
 
															+#define ROOT_ATTR(name, mode, show, store) \
														
 
															+static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store)
														
 
															+
														
 
															+ROOT_ATTR(blocks_used,	0444,	root_blocks_used_show,	NULL);
														
 
															+ROOT_ATTR(block_limit,	0644,	root_block_limit_show,	NULL);
														
 
															+
														
 
															+static struct attribute *btrfs_root_attrs[] = {
														
 
															+	&btrfs_root_attr_blocks_used.attr,
														
 
															+	&btrfs_root_attr_block_limit.attr,
														
 
															+	NULL,
														
 
															+};
														
 
															+
														
 
															+/* this is for super attrs (actual full fs) */
														
 
															+struct btrfs_super_attr {
														
 
															+	struct attribute attr;
														
 
															+	ssize_t (*show)(struct btrfs_fs_info *, char *);
														
 
															+	ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
														
 
															+};
														
 
															+
														
 
															+#define SUPER_ATTR(name, mode, show, store) \
														
 
															+static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store)
														
 
															+
														
 
															+SUPER_ATTR(blocks_used,		0444,	super_blocks_used_show,		NULL);
														
 
															+SUPER_ATTR(total_blocks,	0444,	super_total_blocks_show,	NULL);
														
 
															+SUPER_ATTR(blocksize,		0444,	super_blocksize_show,		NULL);
														
 
															+
														
 
															+static struct attribute *btrfs_super_attrs[] = {
														
 
															+	&btrfs_super_attr_blocks_used.attr,
														
 
															+	&btrfs_super_attr_total_blocks.attr,
														
 
															+	&btrfs_super_attr_blocksize.attr,
														
 
															+	NULL,
														
 
															+};
														
 
															+
														
 
															+static ssize_t btrfs_super_attr_show(struct kobject *kobj,
														
 
															+				    struct attribute *attr, char *buf)
														
 
															+{
														
 
															+	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
														
 
															+						super_kobj);
														
 
															+	struct btrfs_super_attr *a = container_of(attr,
														
 
															+						  struct btrfs_super_attr,
														
 
															+						  attr);
														
 
															+
														
 
															+	return a->show ? a->show(fs, buf) : 0;
														
 
															+}
														
 
															+
														
 
															+static ssize_t btrfs_super_attr_store(struct kobject *kobj,
														
 
															+				     struct attribute *attr,
														
 
															+				     const char *buf, size_t len)
														
 
															+{
														
 
															+	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
														
 
															+						super_kobj);
														
 
															+	struct btrfs_super_attr *a = container_of(attr,
														
 
															+						  struct btrfs_super_attr,
														
 
															+						  attr);
														
 
															+
														
 
															+	return a->store ? a->store(fs, buf, len) : 0;
														
 
															+}
														
 
															+
														
 
															+static ssize_t btrfs_root_attr_show(struct kobject *kobj,
														
 
															+				    struct attribute *attr, char *buf)
														
 
															+{
														
 
															+	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
														
 
															+						root_kobj);
														
 
															+	struct btrfs_root_attr *a = container_of(attr,
														
 
															+						 struct btrfs_root_attr,
														
 
															+						 attr);
														
 
															+
														
 
															+	return a->show ? a->show(root, buf) : 0;
														
 
															+}
														
 
															+
														
 
															+static ssize_t btrfs_root_attr_store(struct kobject *kobj,
														
 
															+				     struct attribute *attr,
														
 
															+				     const char *buf, size_t len)
														
 
															+{
														
 
															+	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
														
 
															+						root_kobj);
														
 
															+	struct btrfs_root_attr *a = container_of(attr,
														
 
															+						 struct btrfs_root_attr,
														
 
															+						 attr);
														
 
															+	return a->store ? a->store(root, buf, len) : 0;
														
 
															+}
														
 
															+
														
 
															+static void btrfs_super_release(struct kobject *kobj)
														
 
															+{
														
 
															+	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
														
 
															+						super_kobj);
														
 
															+	complete(&fs->kobj_unregister);
														
 
															+}
														
 
															+
														
 
															+static void btrfs_root_release(struct kobject *kobj)
														
 
															+{
														
 
															+	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
														
 
															+						root_kobj);
														
 
															+	complete(&root->kobj_unregister);
														
 
															+}
														
 
															+
														
 
															+static struct sysfs_ops btrfs_super_attr_ops = {
														
 
															+	.show	= btrfs_super_attr_show,
														
 
															+	.store	= btrfs_super_attr_store,
														
 
															+};
														
 
															+
														
 
															+static struct sysfs_ops btrfs_root_attr_ops = {
														
 
															+	.show	= btrfs_root_attr_show,
														
 
															+	.store	= btrfs_root_attr_store,
														
 
															+};
														
 
															+
														
 
															+static struct kobj_type btrfs_root_ktype = {
														
 
															+	.default_attrs	= btrfs_root_attrs,
														
 
															+	.sysfs_ops	= &btrfs_root_attr_ops,
														
 
															+	.release	= btrfs_root_release,
														
 
															+};
														
 
															+
														
 
															+static struct kobj_type btrfs_super_ktype = {
														
 
															+	.default_attrs	= btrfs_super_attrs,
														
 
															+	.sysfs_ops	= &btrfs_super_attr_ops,
														
 
															+	.release	= btrfs_super_release,
														
 
															+};
														
 
															+
														
 
															+/* /sys/fs/btrfs/ entry */
														
 
															+static struct kset *btrfs_kset;
														
 
															+
														
 
															+int btrfs_sysfs_add_super(struct btrfs_fs_info *fs)
														
 
															+{
														
 
															+	int error;
														
 
															+	char *name;
														
 
															+	char c;
														
 
															+	int len = strlen(fs->sb->s_id) + 1;
														
 
															+	int i;
														
 
															+
														
 
															+	name = kmalloc(len, GFP_NOFS);
														
 
															+	if (!name) {
														
 
															+		error = -ENOMEM;
														
 
															+		goto fail;
														
 
															+	}
														
 
															+
														
 
															+	for (i = 0; i < len; i++) {
														
 
															+		c = fs->sb->s_id[i];
														
 
															+		if (c == '/' || c == '\\')
														
 
															+			c = '!';
														
 
															+		name[i] = c;
														
 
															+	}
														
 
															+	name[len] = '\0';
														
 
															+
														
 
															+	fs->super_kobj.kset = btrfs_kset;
														
 
															+	error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype,
														
 
															+				     NULL, "%s", name);
														
 
															+	if (error)
														
 
															+		goto fail;
														
 
															+
														
 
															+	kfree(name);
														
 
															+	return 0;
														
 
															+
														
 
															+fail:
														
 
															+	kfree(name);
														
 
															+	printk(KERN_ERR "btrfs: sysfs creation for super failed\n");
														
 
															+	return error;
														
 
															+}
														
 
															+
														
 
															+int btrfs_sysfs_add_root(struct btrfs_root *root)
														
 
															+{
														
 
															+	int error;
														
 
															+
														
 
															+	error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype,
														
 
															+				     &root->fs_info->super_kobj,
														
 
															+				     "%s", root->name);
														
 
															+	if (error)
														
 
															+		goto fail;
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+fail:
														
 
															+	printk(KERN_ERR "btrfs: sysfs creation for root failed\n");
														
 
															+	return error;
														
 
															+}
														
 
															+
														
 
															+void btrfs_sysfs_del_root(struct btrfs_root *root)
														
 
															+{
														
 
															+	kobject_put(&root->root_kobj);
														
 
															+	wait_for_completion(&root->kobj_unregister);
														
 
															+}
														
 
															+
														
 
															+void btrfs_sysfs_del_super(struct btrfs_fs_info *fs)
														
 
															+{
														
 
															+	kobject_put(&fs->super_kobj);
														
 
															+	wait_for_completion(&fs->kobj_unregister);
														
 
															+}
														
 
															+
														
 
															+int btrfs_init_sysfs(void)
														
 
															+{
														
 
															+	btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
														
 
															+	if (!btrfs_kset)
														
 
															+		return -ENOMEM;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void btrfs_exit_sysfs(void)
														
 
															+{
														
 
															+	kset_unregister(btrfs_kset);
														
 
															+}
														
 
															+
														
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -0,0 +1,1023 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/writeback.h>
														
 
															+#include <linux/pagemap.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "locking.h"
														
 
															+#include "ref-cache.h"
														
 
															+#include "tree-log.h"
														
 
															+
														
 
															+static int total_trans = 0;
														
 
															+extern struct kmem_cache *btrfs_trans_handle_cachep;
														
 
															+extern struct kmem_cache *btrfs_transaction_cachep;
														
 
															+
														
 
															+#define BTRFS_ROOT_TRANS_TAG 0
														
 
															+
														
 
															+static noinline void put_transaction(struct btrfs_transaction *transaction)
														
 
															+{
														
 
															+	WARN_ON(transaction->use_count == 0);
														
 
															+	transaction->use_count--;
														
 
															+	if (transaction->use_count == 0) {
														
 
															+		WARN_ON(total_trans == 0);
														
 
															+		total_trans--;
														
 
															+		list_del_init(&transaction->list);
														
 
															+		memset(transaction, 0, sizeof(*transaction));
														
 
															+		kmem_cache_free(btrfs_transaction_cachep, transaction);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * either allocate a new transaction or hop into the existing one
														
 
															+ */
														
 
															+static noinline int join_transaction(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_transaction *cur_trans;
														
 
															+	cur_trans = root->fs_info->running_transaction;
														
 
															+	if (!cur_trans) {
														
 
															+		cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
														
 
															+					     GFP_NOFS);
														
 
															+		total_trans++;
														
 
															+		BUG_ON(!cur_trans);
														
 
															+		root->fs_info->generation++;
														
 
															+		root->fs_info->last_alloc = 0;
														
 
															+		root->fs_info->last_data_alloc = 0;
														
 
															+		cur_trans->num_writers = 1;
														
 
															+		cur_trans->num_joined = 0;
														
 
															+		cur_trans->transid = root->fs_info->generation;
														
 
															+		init_waitqueue_head(&cur_trans->writer_wait);
														
 
															+		init_waitqueue_head(&cur_trans->commit_wait);
														
 
															+		cur_trans->in_commit = 0;
														
 
															+		cur_trans->blocked = 0;
														
 
															+		cur_trans->use_count = 1;
														
 
															+		cur_trans->commit_done = 0;
														
 
															+		cur_trans->start_time = get_seconds();
														
 
															+		INIT_LIST_HEAD(&cur_trans->pending_snapshots);
														
 
															+		list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
														
 
															+		extent_io_tree_init(&cur_trans->dirty_pages,
														
 
															+				     root->fs_info->btree_inode->i_mapping,
														
 
															+				     GFP_NOFS);
														
 
															+		spin_lock(&root->fs_info->new_trans_lock);
														
 
															+		root->fs_info->running_transaction = cur_trans;
														
 
															+		spin_unlock(&root->fs_info->new_trans_lock);
														
 
															+	} else {
														
 
															+		cur_trans->num_writers++;
														
 
															+		cur_trans->num_joined++;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this does all the record keeping required to make sure that a
														
 
															+ * reference counted root is properly recorded in a given transaction.
														
 
															+ * This is required to make sure the old root from before we joined the transaction
														
 
															+ * is deleted when the transaction commits
														
 
															+ */
														
 
															+noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_dirty_root *dirty;
														
 
															+	u64 running_trans_id = root->fs_info->running_transaction->transid;
														
 
															+	if (root->ref_cows && root->last_trans < running_trans_id) {
														
 
															+		WARN_ON(root == root->fs_info->extent_root);
														
 
															+		if (root->root_item.refs != 0) {
														
 
															+			radix_tree_tag_set(&root->fs_info->fs_roots_radix,
														
 
															+				   (unsigned long)root->root_key.objectid,
														
 
															+				   BTRFS_ROOT_TRANS_TAG);
														
 
															+
														
 
															+			dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
														
 
															+			BUG_ON(!dirty);
														
 
															+			dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
														
 
															+			BUG_ON(!dirty->root);
														
 
															+			dirty->latest_root = root;
														
 
															+			INIT_LIST_HEAD(&dirty->list);
														
 
															+
														
 
															+			root->commit_root = btrfs_root_node(root);
														
 
															+
														
 
															+			memcpy(dirty->root, root, sizeof(*root));
														
 
															+			spin_lock_init(&dirty->root->node_lock);
														
 
															+			spin_lock_init(&dirty->root->list_lock);
														
 
															+			mutex_init(&dirty->root->objectid_mutex);
														
 
															+			mutex_init(&dirty->root->log_mutex);
														
 
															+			INIT_LIST_HEAD(&dirty->root->dead_list);
														
 
															+			dirty->root->node = root->commit_root;
														
 
															+			dirty->root->commit_root = NULL;
														
 
															+
														
 
															+			spin_lock(&root->list_lock);
														
 
															+			list_add(&dirty->root->dead_list, &root->dead_list);
														
 
															+			spin_unlock(&root->list_lock);
														
 
															+
														
 
															+			root->dirty_root = dirty;
														
 
															+		} else {
														
 
															+			WARN_ON(1);
														
 
															+		}
														
 
															+		root->last_trans = running_trans_id;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* wait for commit against the current transaction to become unblocked
														
 
															+ * when this is done, it is safe to start a new transaction, but the current
														
 
															+ * transaction might not be fully on disk.
														
 
															+ */
														
 
															+static void wait_current_trans(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_transaction *cur_trans;
														
 
															+
														
 
															+	cur_trans = root->fs_info->running_transaction;
														
 
															+	if (cur_trans && cur_trans->blocked) {
														
 
															+		DEFINE_WAIT(wait);
														
 
															+		cur_trans->use_count++;
														
 
															+		while(1) {
														
 
															+			prepare_to_wait(&root->fs_info->transaction_wait, &wait,
														
 
															+					TASK_UNINTERRUPTIBLE);
														
 
															+			if (cur_trans->blocked) {
														
 
															+				mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+				schedule();
														
 
															+				mutex_lock(&root->fs_info->trans_mutex);
														
 
															+				finish_wait(&root->fs_info->transaction_wait,
														
 
															+					    &wait);
														
 
															+			} else {
														
 
															+				finish_wait(&root->fs_info->transaction_wait,
														
 
															+					    &wait);
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+		put_transaction(cur_trans);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
														
 
															+					     int num_blocks, int wait)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *h =
														
 
															+		kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
														
 
															+	int ret;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	if (!root->fs_info->log_root_recovering &&
														
 
															+	    ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2))
														
 
															+		wait_current_trans(root);
														
 
															+	ret = join_transaction(root);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_record_root_in_trans(root);
														
 
															+	h->transid = root->fs_info->running_transaction->transid;
														
 
															+	h->transaction = root->fs_info->running_transaction;
														
 
															+	h->blocks_reserved = num_blocks;
														
 
															+	h->blocks_used = 0;
														
 
															+	h->block_group = NULL;
														
 
															+	h->alloc_exclude_nr = 0;
														
 
															+	h->alloc_exclude_start = 0;
														
 
															+	root->fs_info->running_transaction->use_count++;
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+	return h;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
														
 
															+						   int num_blocks)
														
 
															+{
														
 
															+	return start_transaction(root, num_blocks, 1);
														
 
															+}
														
 
															+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
														
 
															+						   int num_blocks)
														
 
															+{
														
 
															+	return start_transaction(root, num_blocks, 0);
														
 
															+}
														
 
															+
														
 
															+struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
														
 
															+							 int num_blocks)
														
 
															+{
														
 
															+	return start_transaction(r, num_blocks, 2);
														
 
															+}
														
 
															+
														
 
															+/* wait for a transaction commit to be fully complete */
														
 
															+static noinline int wait_for_commit(struct btrfs_root *root,
														
 
															+				    struct btrfs_transaction *commit)
														
 
															+{
														
 
															+	DEFINE_WAIT(wait);
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	while(!commit->commit_done) {
														
 
															+		prepare_to_wait(&commit->commit_wait, &wait,
														
 
															+				TASK_UNINTERRUPTIBLE);
														
 
															+		if (commit->commit_done)
														
 
															+			break;
														
 
															+		mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+		schedule();
														
 
															+		mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	}
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+	finish_wait(&commit->commit_wait, &wait);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * rate limit against the drop_snapshot code.  This helps to slow down new operations
														
 
															+ * if the drop_snapshot code isn't able to keep up.
														
 
															+ */
														
 
															+static void throttle_on_drops(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	int harder_count = 0;
														
 
															+
														
 
															+harder:
														
 
															+	if (atomic_read(&info->throttles)) {
														
 
															+		DEFINE_WAIT(wait);
														
 
															+		int thr;
														
 
															+		thr = atomic_read(&info->throttle_gen);
														
 
															+
														
 
															+		do {
														
 
															+			prepare_to_wait(&info->transaction_throttle,
														
 
															+					&wait, TASK_UNINTERRUPTIBLE);
														
 
															+			if (!atomic_read(&info->throttles)) {
														
 
															+				finish_wait(&info->transaction_throttle, &wait);
														
 
															+				break;
														
 
															+			}
														
 
															+			schedule();
														
 
															+			finish_wait(&info->transaction_throttle, &wait);
														
 
															+		} while (thr == atomic_read(&info->throttle_gen));
														
 
															+		harder_count++;
														
 
															+
														
 
															+		if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
														
 
															+		    harder_count < 2)
														
 
															+			goto harder;
														
 
															+
														
 
															+		if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
														
 
															+		    harder_count < 10)
														
 
															+			goto harder;
														
 
															+
														
 
															+		if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
														
 
															+		    harder_count < 20)
														
 
															+			goto harder;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+void btrfs_throttle(struct btrfs_root *root)
														
 
															+{
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	if (!root->fs_info->open_ioctl_trans)
														
 
															+		wait_current_trans(root);
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+	throttle_on_drops(root);
														
 
															+}
														
 
															+
														
 
															+static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, int throttle)
														
 
															+{
														
 
															+	struct btrfs_transaction *cur_trans;
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+
														
 
															+	mutex_lock(&info->trans_mutex);
														
 
															+	cur_trans = info->running_transaction;
														
 
															+	WARN_ON(cur_trans != trans->transaction);
														
 
															+	WARN_ON(cur_trans->num_writers < 1);
														
 
															+	cur_trans->num_writers--;
														
 
															+
														
 
															+	if (waitqueue_active(&cur_trans->writer_wait))
														
 
															+		wake_up(&cur_trans->writer_wait);
														
 
															+	put_transaction(cur_trans);
														
 
															+	mutex_unlock(&info->trans_mutex);
														
 
															+	memset(trans, 0, sizeof(*trans));
														
 
															+	kmem_cache_free(btrfs_trans_handle_cachep, trans);
														
 
															+
														
 
															+	if (throttle)
														
 
															+		throttle_on_drops(root);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root)
														
 
															+{
														
 
															+	return __btrfs_end_transaction(trans, root, 0);
														
 
															+}
														
 
															+
														
 
															+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *root)
														
 
															+{
														
 
															+	return __btrfs_end_transaction(trans, root, 1);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when btree blocks are allocated, they have some corresponding bits set for
														
 
															+ * them in one of two extent_io trees.  This is used to make sure all of
														
 
															+ * those extents are on disk for transaction or log commit
														
 
															+ */
														
 
															+int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
														
 
															+					struct extent_io_tree *dirty_pages)
														
 
															+{
														
 
															+	int ret;
														
 
															+	int err = 0;
														
 
															+	int werr = 0;
														
 
															+	struct page *page;
														
 
															+	struct inode *btree_inode = root->fs_info->btree_inode;
														
 
															+	u64 start = 0;
														
 
															+	u64 end;
														
 
															+	unsigned long index;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
														
 
															+					    EXTENT_DIRTY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+		while(start <= end) {
														
 
															+			cond_resched();
														
 
															+
														
 
															+			index = start >> PAGE_CACHE_SHIFT;
														
 
															+			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
														
 
															+			page = find_get_page(btree_inode->i_mapping, index);
														
 
															+			if (!page)
														
 
															+				continue;
														
 
															+
														
 
															+			btree_lock_page_hook(page);
														
 
															+			if (!page->mapping) {
														
 
															+				unlock_page(page);
														
 
															+				page_cache_release(page);
														
 
															+				continue;
														
 
															+			}
														
 
															+
														
 
															+			if (PageWriteback(page)) {
														
 
															+				if (PageDirty(page))
														
 
															+					wait_on_page_writeback(page);
														
 
															+				else {
														
 
															+					unlock_page(page);
														
 
															+					page_cache_release(page);
														
 
															+					continue;
														
 
															+				}
														
 
															+			}
														
 
															+			err = write_one_page(page, 0);
														
 
															+			if (err)
														
 
															+				werr = err;
														
 
															+			page_cache_release(page);
														
 
															+		}
														
 
															+	}
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
														
 
															+					    EXTENT_DIRTY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
														
 
															+		while(start <= end) {
														
 
															+			index = start >> PAGE_CACHE_SHIFT;
														
 
															+			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
														
 
															+			page = find_get_page(btree_inode->i_mapping, index);
														
 
															+			if (!page)
														
 
															+				continue;
														
 
															+			if (PageDirty(page)) {
														
 
															+				btree_lock_page_hook(page);
														
 
															+				wait_on_page_writeback(page);
														
 
															+				err = write_one_page(page, 0);
														
 
															+				if (err)
														
 
															+					werr = err;
														
 
															+			}
														
 
															+			wait_on_page_writeback(page);
														
 
															+			page_cache_release(page);
														
 
															+			cond_resched();
														
 
															+		}
														
 
															+	}
														
 
															+	if (err)
														
 
															+		werr = err;
														
 
															+	return werr;
														
 
															+}
														
 
															+
														
 
															+int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
														
 
															+				     struct btrfs_root *root)
														
 
															+{
														
 
															+	if (!trans || !trans->transaction) {
														
 
															+		struct inode *btree_inode;
														
 
															+		btree_inode = root->fs_info->btree_inode;
														
 
															+		return filemap_write_and_wait(btree_inode->i_mapping);
														
 
															+	}
														
 
															+	return btrfs_write_and_wait_marked_extents(root,
														
 
															+					   &trans->transaction->dirty_pages);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this is used to update the root pointer in the tree of tree roots.
														
 
															+ *
														
 
															+ * But, in the case of the extent allocation tree, updating the root
														
 
															+ * pointer may allocate blocks which may change the root of the extent
														
 
															+ * allocation tree.
														
 
															+ *
														
 
															+ * So, this loops and repeats and makes sure the cowonly root didn't
														
 
															+ * change while the root pointer was being updated in the metadata.
														
 
															+ */
														
 
															+static int update_cowonly_root(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u64 old_root_bytenr;
														
 
															+	struct btrfs_root *tree_root = root->fs_info->tree_root;
														
 
															+
														
 
															+	btrfs_write_dirty_block_groups(trans, root);
														
 
															+	while(1) {
														
 
															+		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
														
 
															+		if (old_root_bytenr == root->node->start)
														
 
															+			break;
														
 
															+		btrfs_set_root_bytenr(&root->root_item,
														
 
															+				       root->node->start);
														
 
															+		btrfs_set_root_level(&root->root_item,
														
 
															+				     btrfs_header_level(root->node));
														
 
															+		ret = btrfs_update_root(trans, tree_root,
														
 
															+					&root->root_key,
														
 
															+					&root->root_item);
														
 
															+		BUG_ON(ret);
														
 
															+		btrfs_write_dirty_block_groups(trans, root);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * update all the cowonly tree roots on disk
														
 
															+ */
														
 
															+int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_fs_info *fs_info = root->fs_info;
														
 
															+	struct list_head *next;
														
 
															+
														
 
															+	while(!list_empty(&fs_info->dirty_cowonly_roots)) {
														
 
															+		next = fs_info->dirty_cowonly_roots.next;
														
 
															+		list_del_init(next);
														
 
															+		root = list_entry(next, struct btrfs_root, dirty_list);
														
 
															+		update_cowonly_root(trans, root);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * dead roots are old snapshots that need to be deleted.  This allocates
														
 
															+ * a dirty root struct and adds it into the list of dead roots that need to
														
 
															+ * be deleted
														
 
															+ */
														
 
															+int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
														
 
															+{
														
 
															+	struct btrfs_dirty_root *dirty;
														
 
															+
														
 
															+	dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
														
 
															+	if (!dirty)
														
 
															+		return -ENOMEM;
														
 
															+	dirty->root = root;
														
 
															+	dirty->latest_root = latest;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	list_add(&dirty->list, &latest->fs_info->dead_roots);
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * at transaction commit time we need to schedule the old roots for
														
 
															+ * deletion via btrfs_drop_snapshot.  This runs through all the
														
 
															+ * reference counted roots that were modified in the current
														
 
															+ * transaction and puts them into the drop list
														
 
															+ */
														
 
															+static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
														
 
															+				    struct radix_tree_root *radix,
														
 
															+				    struct list_head *list)
														
 
															+{
														
 
															+	struct btrfs_dirty_root *dirty;
														
 
															+	struct btrfs_root *gang[8];
														
 
															+	struct btrfs_root *root;
														
 
															+	int i;
														
 
															+	int ret;
														
 
															+	int err = 0;
														
 
															+	u32 refs;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
														
 
															+						 ARRAY_SIZE(gang),
														
 
															+						 BTRFS_ROOT_TRANS_TAG);
														
 
															+		if (ret == 0)
														
 
															+			break;
														
 
															+		for (i = 0; i < ret; i++) {
														
 
															+			root = gang[i];
														
 
															+			radix_tree_tag_clear(radix,
														
 
															+				     (unsigned long)root->root_key.objectid,
														
 
															+				     BTRFS_ROOT_TRANS_TAG);
														
 
															+
														
 
															+			BUG_ON(!root->ref_tree);
														
 
															+			dirty = root->dirty_root;
														
 
															+
														
 
															+			btrfs_free_log(trans, root);
														
 
															+			btrfs_free_reloc_root(root);
														
 
															+
														
 
															+			if (root->commit_root == root->node) {
														
 
															+				WARN_ON(root->node->start !=
														
 
															+					btrfs_root_bytenr(&root->root_item));
														
 
															+
														
 
															+				free_extent_buffer(root->commit_root);
														
 
															+				root->commit_root = NULL;
														
 
															+				root->dirty_root = NULL;
														
 
															+
														
 
															+				spin_lock(&root->list_lock);
														
 
															+				list_del_init(&dirty->root->dead_list);
														
 
															+				spin_unlock(&root->list_lock);
														
 
															+
														
 
															+				kfree(dirty->root);
														
 
															+				kfree(dirty);
														
 
															+
														
 
															+				/* make sure to update the root on disk
														
 
															+				 * so we get any updates to the block used
														
 
															+				 * counts
														
 
															+				 */
														
 
															+				err = btrfs_update_root(trans,
														
 
															+						root->fs_info->tree_root,
														
 
															+						&root->root_key,
														
 
															+						&root->root_item);
														
 
															+				continue;
														
 
															+			}
														
 
															+
														
 
															+			memset(&root->root_item.drop_progress, 0,
														
 
															+			       sizeof(struct btrfs_disk_key));
														
 
															+			root->root_item.drop_level = 0;
														
 
															+			root->commit_root = NULL;
														
 
															+			root->dirty_root = NULL;
														
 
															+			root->root_key.offset = root->fs_info->generation;
														
 
															+			btrfs_set_root_bytenr(&root->root_item,
														
 
															+					      root->node->start);
														
 
															+			btrfs_set_root_level(&root->root_item,
														
 
															+					     btrfs_header_level(root->node));
														
 
															+			err = btrfs_insert_root(trans, root->fs_info->tree_root,
														
 
															+						&root->root_key,
														
 
															+						&root->root_item);
														
 
															+			if (err)
														
 
															+				break;
														
 
															+
														
 
															+			refs = btrfs_root_refs(&dirty->root->root_item);
														
 
															+			btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
														
 
															+			err = btrfs_update_root(trans, root->fs_info->tree_root,
														
 
															+						&dirty->root->root_key,
														
 
															+						&dirty->root->root_item);
														
 
															+
														
 
															+			BUG_ON(err);
														
 
															+			if (refs == 1) {
														
 
															+				list_add(&dirty->list, list);
														
 
															+			} else {
														
 
															+				WARN_ON(1);
														
 
															+				free_extent_buffer(dirty->root->node);
														
 
															+				kfree(dirty->root);
														
 
															+				kfree(dirty);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * defrag a given btree.  If cacheonly == 1, this won't read from the disk,
														
 
															+ * otherwise every leaf in the btree is read and defragged.
														
 
															+ */
														
 
															+int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
														
 
															+{
														
 
															+	struct btrfs_fs_info *info = root->fs_info;
														
 
															+	int ret;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	unsigned long nr;
														
 
															+
														
 
															+	smp_mb();
														
 
															+	if (root->defrag_running)
														
 
															+		return 0;
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	while (1) {
														
 
															+		root->defrag_running = 1;
														
 
															+		ret = btrfs_defrag_leaves(trans, root, cacheonly);
														
 
															+		nr = trans->blocks_used;
														
 
															+		btrfs_end_transaction(trans, root);
														
 
															+		btrfs_btree_balance_dirty(info->tree_root, nr);
														
 
															+		cond_resched();
														
 
															+
														
 
															+		trans = btrfs_start_transaction(root, 1);
														
 
															+		if (root->fs_info->closing || ret != -EAGAIN)
														
 
															+			break;
														
 
															+	}
														
 
															+	root->defrag_running = 0;
														
 
															+	smp_mb();
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
														
 
															+ * all of them
														
 
															+ */
														
 
															+static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
														
 
															+				     struct list_head *list)
														
 
															+{
														
 
															+	struct btrfs_dirty_root *dirty;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	unsigned long nr;
														
 
															+	u64 num_bytes;
														
 
															+	u64 bytes_used;
														
 
															+	u64 max_useless;
														
 
															+	int ret = 0;
														
 
															+	int err;
														
 
															+
														
 
															+	while(!list_empty(list)) {
														
 
															+		struct btrfs_root *root;
														
 
															+
														
 
															+		dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
														
 
															+		list_del_init(&dirty->list);
														
 
															+
														
 
															+		num_bytes = btrfs_root_used(&dirty->root->root_item);
														
 
															+		root = dirty->latest_root;
														
 
															+		atomic_inc(&root->fs_info->throttles);
														
 
															+
														
 
															+		while(1) {
														
 
															+			trans = btrfs_start_transaction(tree_root, 1);
														
 
															+			mutex_lock(&root->fs_info->drop_mutex);
														
 
															+			ret = btrfs_drop_snapshot(trans, dirty->root);
														
 
															+			if (ret != -EAGAIN) {
														
 
															+				break;
														
 
															+			}
														
 
															+			mutex_unlock(&root->fs_info->drop_mutex);
														
 
															+
														
 
															+			err = btrfs_update_root(trans,
														
 
															+					tree_root,
														
 
															+					&dirty->root->root_key,
														
 
															+					&dirty->root->root_item);
														
 
															+			if (err)
														
 
															+				ret = err;
														
 
															+			nr = trans->blocks_used;
														
 
															+			ret = btrfs_end_transaction(trans, tree_root);
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			btrfs_btree_balance_dirty(tree_root, nr);
														
 
															+			cond_resched();
														
 
															+		}
														
 
															+		BUG_ON(ret);
														
 
															+		atomic_dec(&root->fs_info->throttles);
														
 
															+		wake_up(&root->fs_info->transaction_throttle);
														
 
															+
														
 
															+		mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+		num_bytes -= btrfs_root_used(&dirty->root->root_item);
														
 
															+		bytes_used = btrfs_root_used(&root->root_item);
														
 
															+		if (num_bytes) {
														
 
															+			btrfs_record_root_in_trans(root);
														
 
															+			btrfs_set_root_used(&root->root_item,
														
 
															+					    bytes_used - num_bytes);
														
 
															+		}
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+		ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
														
 
															+		if (ret) {
														
 
															+			BUG();
														
 
															+			break;
														
 
															+		}
														
 
															+		mutex_unlock(&root->fs_info->drop_mutex);
														
 
															+
														
 
															+		spin_lock(&root->list_lock);
														
 
															+		list_del_init(&dirty->root->dead_list);
														
 
															+		if (!list_empty(&root->dead_list)) {
														
 
															+			struct btrfs_root *oldest;
														
 
															+			oldest = list_entry(root->dead_list.prev,
														
 
															+					    struct btrfs_root, dead_list);
														
 
															+			max_useless = oldest->root_key.offset - 1;
														
 
															+		} else {
														
 
															+			max_useless = root->root_key.offset - 1;
														
 
															+		}
														
 
															+		spin_unlock(&root->list_lock);
														
 
															+
														
 
															+		nr = trans->blocks_used;
														
 
															+		ret = btrfs_end_transaction(trans, tree_root);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		ret = btrfs_remove_leaf_refs(root, max_useless, 0);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		free_extent_buffer(dirty->root->node);
														
 
															+		kfree(dirty->root);
														
 
															+		kfree(dirty);
														
 
															+
														
 
															+		btrfs_btree_balance_dirty(tree_root, nr);
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * new snapshots need to be created at a very specific time in the
														
 
															+ * transaction commit.  This does the actual creation
														
 
															+ */
														
 
															+static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_fs_info *fs_info,
														
 
															+				   struct btrfs_pending_snapshot *pending)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_root_item *new_root_item;
														
 
															+	struct btrfs_root *tree_root = fs_info->tree_root;
														
 
															+	struct btrfs_root *root = pending->root;
														
 
															+	struct extent_buffer *tmp;
														
 
															+	struct extent_buffer *old;
														
 
															+	int ret;
														
 
															+	int namelen;
														
 
															+	u64 objectid;
														
 
															+
														
 
															+	new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
														
 
															+	if (!new_root_item) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto fail;
														
 
															+	}
														
 
															+	ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
														
 
															+
														
 
															+	key.objectid = objectid;
														
 
															+	key.offset = trans->transid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
														
 
															+
														
 
															+	old = btrfs_lock_root_node(root);
														
 
															+	btrfs_cow_block(trans, root, old, NULL, 0, &old, 0);
														
 
															+
														
 
															+	btrfs_copy_root(trans, root, old, &tmp, objectid);
														
 
															+	btrfs_tree_unlock(old);
														
 
															+	free_extent_buffer(old);
														
 
															+
														
 
															+	btrfs_set_root_bytenr(new_root_item, tmp->start);
														
 
															+	btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
														
 
															+	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
														
 
															+				new_root_item);
														
 
															+	btrfs_tree_unlock(tmp);
														
 
															+	free_extent_buffer(tmp);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	/*
														
 
															+	 * insert the directory item
														
 
															+	 */
														
 
															+	key.offset = (u64)-1;
														
 
															+	namelen = strlen(pending->name);
														
 
															+	ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
														
 
															+				    pending->name, namelen,
														
 
															+				    root->fs_info->sb->s_root->d_inode->i_ino,
														
 
															+				    &key, BTRFS_FT_DIR, 0);
														
 
															+
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
														
 
															+			     pending->name, strlen(pending->name), objectid,
														
 
															+			     root->fs_info->sb->s_root->d_inode->i_ino, 0);
														
 
															+
														
 
															+	/* Invalidate existing dcache entry for new snapshot. */
														
 
															+	btrfs_invalidate_dcache_root(root, pending->name, namelen);
														
 
															+
														
 
															+fail:
														
 
															+	kfree(new_root_item);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * create all the snapshots we've scheduled for creation
														
 
															+ */
														
 
															+static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
														
 
															+					     struct btrfs_fs_info *fs_info)
														
 
															+{
														
 
															+	struct btrfs_pending_snapshot *pending;
														
 
															+	struct list_head *head = &trans->transaction->pending_snapshots;
														
 
															+	int ret;
														
 
															+
														
 
															+	while(!list_empty(head)) {
														
 
															+		pending = list_entry(head->next,
														
 
															+				     struct btrfs_pending_snapshot, list);
														
 
															+		ret = create_pending_snapshot(trans, fs_info, pending);
														
 
															+		BUG_ON(ret);
														
 
															+		list_del(&pending->list);
														
 
															+		kfree(pending->name);
														
 
															+		kfree(pending);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root)
														
 
															+{
														
 
															+	unsigned long joined = 0;
														
 
															+	unsigned long timeout = 1;
														
 
															+	struct btrfs_transaction *cur_trans;
														
 
															+	struct btrfs_transaction *prev_trans = NULL;
														
 
															+	struct btrfs_root *chunk_root = root->fs_info->chunk_root;
														
 
															+	struct list_head dirty_fs_roots;
														
 
															+	struct extent_io_tree *pinned_copy;
														
 
															+	DEFINE_WAIT(wait);
														
 
															+	int ret;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&dirty_fs_roots);
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	if (trans->transaction->in_commit) {
														
 
															+		cur_trans = trans->transaction;
														
 
															+		trans->transaction->use_count++;
														
 
															+		mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+		btrfs_end_transaction(trans, root);
														
 
															+
														
 
															+		ret = wait_for_commit(root, cur_trans);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		mutex_lock(&root->fs_info->trans_mutex);
														
 
															+		put_transaction(cur_trans);
														
 
															+		mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
														
 
															+	if (!pinned_copy)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	extent_io_tree_init(pinned_copy,
														
 
															+			     root->fs_info->btree_inode->i_mapping, GFP_NOFS);
														
 
															+
														
 
															+	trans->transaction->in_commit = 1;
														
 
															+	trans->transaction->blocked = 1;
														
 
															+	cur_trans = trans->transaction;
														
 
															+	if (cur_trans->list.prev != &root->fs_info->trans_list) {
														
 
															+		prev_trans = list_entry(cur_trans->list.prev,
														
 
															+					struct btrfs_transaction, list);
														
 
															+		if (!prev_trans->commit_done) {
														
 
															+			prev_trans->use_count++;
														
 
															+			mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+			wait_for_commit(root, prev_trans);
														
 
															+
														
 
															+			mutex_lock(&root->fs_info->trans_mutex);
														
 
															+			put_transaction(prev_trans);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	do {
														
 
															+		int snap_pending = 0;
														
 
															+		joined = cur_trans->num_joined;
														
 
															+		if (!list_empty(&trans->transaction->pending_snapshots))
														
 
															+			snap_pending = 1;
														
 
															+
														
 
															+		WARN_ON(cur_trans != trans->transaction);
														
 
															+		prepare_to_wait(&cur_trans->writer_wait, &wait,
														
 
															+				TASK_UNINTERRUPTIBLE);
														
 
															+
														
 
															+		if (cur_trans->num_writers > 1)
														
 
															+			timeout = MAX_SCHEDULE_TIMEOUT;
														
 
															+		else
														
 
															+			timeout = 1;
														
 
															+
														
 
															+		mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+		if (snap_pending) {
														
 
															+			ret = btrfs_wait_ordered_extents(root, 1);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+
														
 
															+		schedule_timeout(timeout);
														
 
															+
														
 
															+		mutex_lock(&root->fs_info->trans_mutex);
														
 
															+		finish_wait(&cur_trans->writer_wait, &wait);
														
 
															+	} while (cur_trans->num_writers > 1 ||
														
 
															+		 (cur_trans->num_joined != joined));
														
 
															+
														
 
															+	ret = create_pending_snapshots(trans, root->fs_info);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	WARN_ON(cur_trans != trans->transaction);
														
 
															+
														
 
															+	/* btrfs_commit_tree_roots is responsible for getting the
														
 
															+	 * various roots consistent with each other.  Every pointer
														
 
															+	 * in the tree of tree roots has to point to the most up to date
														
 
															+	 * root for every subvolume and other tree.  So, we have to keep
														
 
															+	 * the tree logging code from jumping in and changing any
														
 
															+	 * of the trees.
														
 
															+	 *
														
 
															+	 * At this point in the commit, there can't be any tree-log
														
 
															+	 * writers, but a little lower down we drop the trans mutex
														
 
															+	 * and let new people in.  By holding the tree_log_mutex
														
 
															+	 * from now until after the super is written, we avoid races
														
 
															+	 * with the tree-log code.
														
 
															+	 */
														
 
															+	mutex_lock(&root->fs_info->tree_log_mutex);
														
 
															+	/*
														
 
															+	 * keep tree reloc code from adding new reloc trees
														
 
															+	 */
														
 
															+	mutex_lock(&root->fs_info->tree_reloc_mutex);
														
 
															+
														
 
															+
														
 
															+	ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
														
 
															+			      &dirty_fs_roots);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	/* add_dirty_roots gets rid of all the tree log roots, it is now
														
 
															+	 * safe to free the root of tree log roots
														
 
															+	 */
														
 
															+	btrfs_free_log_root_tree(trans, root->fs_info);
														
 
															+
														
 
															+	btrfs_free_reloc_mappings(root);
														
 
															+
														
 
															+	ret = btrfs_commit_tree_roots(trans, root);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	cur_trans = root->fs_info->running_transaction;
														
 
															+	spin_lock(&root->fs_info->new_trans_lock);
														
 
															+	root->fs_info->running_transaction = NULL;
														
 
															+	spin_unlock(&root->fs_info->new_trans_lock);
														
 
															+	btrfs_set_super_generation(&root->fs_info->super_copy,
														
 
															+				   cur_trans->transid);
														
 
															+	btrfs_set_super_root(&root->fs_info->super_copy,
														
 
															+			     root->fs_info->tree_root->node->start);
														
 
															+	btrfs_set_super_root_level(&root->fs_info->super_copy,
														
 
															+			   btrfs_header_level(root->fs_info->tree_root->node));
														
 
															+
														
 
															+	btrfs_set_super_chunk_root(&root->fs_info->super_copy,
														
 
															+				   chunk_root->node->start);
														
 
															+	btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
														
 
															+					 btrfs_header_level(chunk_root->node));
														
 
															+
														
 
															+	if (!root->fs_info->log_root_recovering) {
														
 
															+		btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
														
 
															+		btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
														
 
															+	}
														
 
															+
														
 
															+	memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
														
 
															+	       sizeof(root->fs_info->super_copy));
														
 
															+
														
 
															+	btrfs_copy_pinned(root, pinned_copy);
														
 
															+
														
 
															+	trans->transaction->blocked = 0;
														
 
															+	wake_up(&root->fs_info->transaction_throttle);
														
 
															+	wake_up(&root->fs_info->transaction_wait);
														
 
															+
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+	ret = btrfs_write_and_wait_transaction(trans, root);
														
 
															+	BUG_ON(ret);
														
 
															+	write_ctree_super(trans, root);
														
 
															+
														
 
															+	/*
														
 
															+	 * the super is written, we can safely allow the tree-loggers
														
 
															+	 * to go about their business
														
 
															+	 */
														
 
															+	mutex_unlock(&root->fs_info->tree_log_mutex);
														
 
															+
														
 
															+	btrfs_finish_extent_commit(trans, root, pinned_copy);
														
 
															+	kfree(pinned_copy);
														
 
															+
														
 
															+	btrfs_drop_dead_reloc_roots(root);
														
 
															+	mutex_unlock(&root->fs_info->tree_reloc_mutex);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+	cur_trans->commit_done = 1;
														
 
															+	root->fs_info->last_trans_committed = cur_trans->transid;
														
 
															+	wake_up(&cur_trans->commit_wait);
														
 
															+	put_transaction(cur_trans);
														
 
															+	put_transaction(cur_trans);
														
 
															+
														
 
															+	list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
														
 
															+	if (root->fs_info->closing)
														
 
															+		list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
														
 
															+
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+	kmem_cache_free(btrfs_trans_handle_cachep, trans);
														
 
															+
														
 
															+	if (root->fs_info->closing) {
														
 
															+		drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * interface function to delete all the snapshots we have scheduled for deletion
														
 
															+ */
														
 
															+int btrfs_clean_old_snapshots(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct list_head dirty_roots;
														
 
															+	INIT_LIST_HEAD(&dirty_roots);
														
 
															+again:
														
 
															+	mutex_lock(&root->fs_info->trans_mutex);
														
 
															+	list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
														
 
															+	mutex_unlock(&root->fs_info->trans_mutex);
														
 
															+
														
 
															+	if (!list_empty(&dirty_roots)) {
														
 
															+		drop_dirty_roots(root, &dirty_roots);
														
 
															+		goto again;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -0,0 +1,104 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_TRANSACTION__
														
 
															+#define __BTRFS_TRANSACTION__
														
 
															+#include "btrfs_inode.h"
														
 
															+
														
 
															+struct btrfs_transaction {
														
 
															+	u64 transid;
														
 
															+	unsigned long num_writers;
														
 
															+	unsigned long num_joined;
														
 
															+	int in_commit;
														
 
															+	int use_count;
														
 
															+	int commit_done;
														
 
															+	int blocked;
														
 
															+	struct list_head list;
														
 
															+	struct extent_io_tree dirty_pages;
														
 
															+	unsigned long start_time;
														
 
															+	wait_queue_head_t writer_wait;
														
 
															+	wait_queue_head_t commit_wait;
														
 
															+	struct list_head pending_snapshots;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_trans_handle {
														
 
															+	u64 transid;
														
 
															+	unsigned long blocks_reserved;
														
 
															+	unsigned long blocks_used;
														
 
															+	struct btrfs_transaction *transaction;
														
 
															+	struct btrfs_block_group_cache *block_group;
														
 
															+	u64 alloc_exclude_start;
														
 
															+	u64 alloc_exclude_nr;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_pending_snapshot {
														
 
															+	struct btrfs_root *root;
														
 
															+	char *name;
														
 
															+	struct list_head list;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_dirty_root {
														
 
															+	struct list_head list;
														
 
															+	struct btrfs_root *root;
														
 
															+	struct btrfs_root *latest_root;
														
 
															+};
														
 
															+
														
 
															+static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
														
 
															+					       struct inode *inode)
														
 
															+{
														
 
															+	trans->block_group = BTRFS_I(inode)->block_group;
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_update_inode_block_group(struct
														
 
															+						  btrfs_trans_handle *trans,
														
 
															+						  struct inode *inode)
														
 
															+{
														
 
															+	BTRFS_I(inode)->block_group = trans->block_group;
														
 
															+}
														
 
															+
														
 
															+static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
														
 
															+					      struct inode *inode)
														
 
															+{
														
 
															+	BTRFS_I(inode)->last_trans = trans->transaction->transid;
														
 
															+}
														
 
															+
														
 
															+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root);
														
 
															+struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
														
 
															+						   int num_blocks);
														
 
															+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
														
 
															+						   int num_blocks);
														
 
															+struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
														
 
															+						   int num_blocks);
														
 
															+int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
														
 
															+				     struct btrfs_root *root);
														
 
															+int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root);
														
 
															+
														
 
															+int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest);
														
 
															+int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
														
 
															+int btrfs_clean_old_snapshots(struct btrfs_root *root);
														
 
															+int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root);
														
 
															+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *root);
														
 
															+void btrfs_throttle(struct btrfs_root *root);
														
 
															+int btrfs_record_root_in_trans(struct btrfs_root *root);
														
 
															+int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
														
 
															+					struct extent_io_tree *dirty_pages);
														
 
															+#endif
														
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -0,0 +1,149 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/sched.h>
														
 
															+#include "ctree.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "transaction.h"
														
 
															+#include "locking.h"
														
 
															+
														
 
															+/* defrag all the leaves in a given btree.  If cache_only == 1, don't read things
														
 
															+ * from disk, otherwise read all the leaves and try to get key order to
														
 
															+ * better reflect disk order
														
 
															+ */
														
 
															+int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
														
 
															+			struct btrfs_root *root, int cache_only)
														
 
															+{
														
 
															+	struct btrfs_path *path = NULL;
														
 
															+	struct btrfs_key key;
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	int level;
														
 
															+	int orig_level;
														
 
															+	int is_extent = 0;
														
 
															+	int next_key_ret = 0;
														
 
															+	u64 last_ret = 0;
														
 
															+	u64 min_trans = 0;
														
 
															+
														
 
															+	if (cache_only)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (root->fs_info->extent_root == root) {
														
 
															+		/*
														
 
															+		 * there's recursion here right now in the tree locking,
														
 
															+		 * we can't defrag the extent root without deadlock
														
 
															+		 */
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (root->ref_cows == 0 && !is_extent)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (btrfs_test_opt(root, SSD))
														
 
															+		goto out;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	level = btrfs_header_level(root->node);
														
 
															+	orig_level = level;
														
 
															+
														
 
															+	if (level == 0) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (root->defrag_progress.objectid == 0) {
														
 
															+		struct extent_buffer *root_node;
														
 
															+		u32 nritems;
														
 
															+
														
 
															+		root_node = btrfs_lock_root_node(root);
														
 
															+		nritems = btrfs_header_nritems(root_node);
														
 
															+		root->defrag_max.objectid = 0;
														
 
															+		/* from above we know this is not a leaf */
														
 
															+		btrfs_node_key_to_cpu(root_node, &root->defrag_max,
														
 
															+				      nritems - 1);
														
 
															+		btrfs_tree_unlock(root_node);
														
 
															+		free_extent_buffer(root_node);
														
 
															+		memset(&key, 0, sizeof(key));
														
 
															+	} else {
														
 
															+		memcpy(&key, &root->defrag_progress, sizeof(key));
														
 
															+	}
														
 
															+
														
 
															+	path->keep_locks = 1;
														
 
															+	if (cache_only)
														
 
															+		min_trans = root->defrag_trans_start;
														
 
															+
														
 
															+	ret = btrfs_search_forward(root, &key, NULL, path,
														
 
															+				   cache_only, min_trans);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	if (ret > 0) {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+	wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
														
 
															+
														
 
															+	if (wret < 0) {
														
 
															+		ret = wret;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	if (!path->nodes[1]) {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	path->slots[1] = btrfs_header_nritems(path->nodes[1]);
														
 
															+	next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only,
														
 
															+					   min_trans);
														
 
															+	ret = btrfs_realloc_node(trans, root,
														
 
															+				 path->nodes[1], 0,
														
 
															+				 cache_only, &last_ret,
														
 
															+				 &root->defrag_progress);
														
 
															+	WARN_ON(ret && ret != -EAGAIN);
														
 
															+	if (next_key_ret == 0) {
														
 
															+		memcpy(&root->defrag_progress, &key, sizeof(key));
														
 
															+		ret = -EAGAIN;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_release_path(root, path);
														
 
															+	if (is_extent)
														
 
															+		btrfs_extent_post_op(trans, root);
														
 
															+out:
														
 
															+	if (is_extent)
														
 
															+		mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+
														
 
															+	if (path)
														
 
															+		btrfs_free_path(path);
														
 
															+	if (ret == -EAGAIN) {
														
 
															+		if (root->defrag_max.objectid > root->defrag_progress.objectid)
														
 
															+			goto done;
														
 
															+		if (root->defrag_max.type > root->defrag_progress.type)
														
 
															+			goto done;
														
 
															+		if (root->defrag_max.offset > root->defrag_progress.offset)
														
 
															+			goto done;
														
 
															+		ret = 0;
														
 
															+	}
														
 
															+done:
														
 
															+	if (ret != -EAGAIN) {
														
 
															+		memset(&root->defrag_progress, 0,
														
 
															+		       sizeof(root->defrag_progress));
														
 
															+		root->defrag_trans_start = trans->transid;
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -0,0 +1,2890 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/sched.h>
														
 
															+#include "ctree.h"
														
 
															+#include "transaction.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "locking.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "compat.h"
														
 
															+
														
 
															+/* magic values for the inode_only field in btrfs_log_inode:
														
 
															+ *
														
 
															+ * LOG_INODE_ALL means to log everything
														
 
															+ * LOG_INODE_EXISTS means to log just enough to recreate the inode
														
 
															+ * during log replay
														
 
															+ */
														
 
															+#define LOG_INODE_ALL 0
														
 
															+#define LOG_INODE_EXISTS 1
														
 
															+
														
 
															+/*
														
 
															+ * stages for the tree walking.  The first
														
 
															+ * stage (0) is to only pin down the blocks we find
														
 
															+ * the second stage (1) is to make sure that all the inodes
														
 
															+ * we find in the log are created in the subvolume.
														
 
															+ *
														
 
															+ * The last stage is to deal with directories and links and extents
														
 
															+ * and all the other fun semantics
														
 
															+ */
														
 
															+#define LOG_WALK_PIN_ONLY 0
														
 
															+#define LOG_WALK_REPLAY_INODES 1
														
 
															+#define LOG_WALK_REPLAY_ALL 2
														
 
															+
														
 
															+static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root, struct inode *inode,
														
 
															+			     int inode_only);
														
 
															+
														
 
															+/*
														
 
															+ * tree logging is a special write ahead log used to make sure that
														
 
															+ * fsyncs and O_SYNCs can happen without doing full tree commits.
														
 
															+ *
														
 
															+ * Full tree commits are expensive because they require commonly
														
 
															+ * modified blocks to be recowed, creating many dirty pages in the
														
 
															+ * extent tree an 4x-6x higher write load than ext3.
														
 
															+ *
														
 
															+ * Instead of doing a tree commit on every fsync, we use the
														
 
															+ * key ranges and transaction ids to find items for a given file or directory
														
 
															+ * that have changed in this transaction.  Those items are copied into
														
 
															+ * a special tree (one per subvolume root), that tree is written to disk
														
 
															+ * and then the fsync is considered complete.
														
 
															+ *
														
 
															+ * After a crash, items are copied out of the log-tree back into the
														
 
															+ * subvolume tree.  Any file data extents found are recorded in the extent
														
 
															+ * allocation tree, and the log-tree freed.
														
 
															+ *
														
 
															+ * The log tree is read three times, once to pin down all the extents it is
														
 
															+ * using in ram and once, once to create all the inodes logged in the tree
														
 
															+ * and once to do all the other items.
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * btrfs_add_log_tree adds a new per-subvolume log tree into the
														
 
															+ * tree of log tree roots.  This must be called with a tree log transaction
														
 
															+ * running (see start_log_trans).
														
 
															+ */
														
 
															+int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_root_item root_item;
														
 
															+	struct btrfs_inode_item *inode_item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_root *new_root = root;
														
 
															+	int ret;
														
 
															+	u64 objectid = root->root_key.objectid;
														
 
															+
														
 
															+	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
														
 
															+				      BTRFS_TREE_LOG_OBJECTID,
														
 
															+				      trans->transid, 0, 0, 0);
														
 
															+	if (IS_ERR(leaf)) {
														
 
															+		ret = PTR_ERR(leaf);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_set_header_nritems(leaf, 0);
														
 
															+	btrfs_set_header_level(leaf, 0);
														
 
															+	btrfs_set_header_bytenr(leaf, leaf->start);
														
 
															+	btrfs_set_header_generation(leaf, trans->transid);
														
 
															+	btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
														
 
															+
														
 
															+	write_extent_buffer(leaf, root->fs_info->fsid,
														
 
															+			    (unsigned long)btrfs_header_fsid(leaf),
														
 
															+			    BTRFS_FSID_SIZE);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+	inode_item = &root_item.inode;
														
 
															+	memset(inode_item, 0, sizeof(*inode_item));
														
 
															+	inode_item->generation = cpu_to_le64(1);
														
 
															+	inode_item->size = cpu_to_le64(3);
														
 
															+	inode_item->nlink = cpu_to_le32(1);
														
 
															+	inode_item->nbytes = cpu_to_le64(root->leafsize);
														
 
															+	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
														
 
															+
														
 
															+	btrfs_set_root_bytenr(&root_item, leaf->start);
														
 
															+	btrfs_set_root_level(&root_item, 0);
														
 
															+	btrfs_set_root_refs(&root_item, 0);
														
 
															+	btrfs_set_root_used(&root_item, 0);
														
 
															+
														
 
															+	memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
														
 
															+	root_item.drop_level = 0;
														
 
															+
														
 
															+	btrfs_tree_unlock(leaf);
														
 
															+	free_extent_buffer(leaf);
														
 
															+	leaf = NULL;
														
 
															+
														
 
															+	btrfs_set_root_dirid(&root_item, 0);
														
 
															+
														
 
															+	key.objectid = BTRFS_TREE_LOG_OBJECTID;
														
 
															+	key.offset = objectid;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
														
 
															+	ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key,
														
 
															+				&root_item);
														
 
															+	if (ret)
														
 
															+		goto fail;
														
 
															+
														
 
															+	new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree,
														
 
															+					       &key);
														
 
															+	BUG_ON(!new_root);
														
 
															+
														
 
															+	WARN_ON(root->log_root);
														
 
															+	root->log_root = new_root;
														
 
															+
														
 
															+	/*
														
 
															+	 * log trees do not get reference counted because they go away
														
 
															+	 * before a real commit is actually done.  They do store pointers
														
 
															+	 * to file data extents, and those reference counts still get
														
 
															+	 * updated (along with back refs to the log tree).
														
 
															+	 */
														
 
															+	new_root->ref_cows = 0;
														
 
															+	new_root->last_trans = trans->transid;
														
 
															+fail:
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * start a sub transaction and setup the log tree
														
 
															+ * this increments the log tree writer count to make the people
														
 
															+ * syncing the tree wait for us to finish
														
 
															+ */
														
 
															+static int start_log_trans(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	mutex_lock(&root->fs_info->tree_log_mutex);
														
 
															+	if (!root->fs_info->log_root_tree) {
														
 
															+		ret = btrfs_init_log_root_tree(trans, root->fs_info);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	if (!root->log_root) {
														
 
															+		ret = btrfs_add_log_tree(trans, root);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	atomic_inc(&root->fs_info->tree_log_writers);
														
 
															+	root->fs_info->tree_log_batch++;
														
 
															+	mutex_unlock(&root->fs_info->tree_log_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * returns 0 if there was a log transaction running and we were able
														
 
															+ * to join, or returns -ENOENT if there were not transactions
														
 
															+ * in progress
														
 
															+ */
														
 
															+static int join_running_log_trans(struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret = -ENOENT;
														
 
															+
														
 
															+	smp_mb();
														
 
															+	if (!root->log_root)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->tree_log_mutex);
														
 
															+	if (root->log_root) {
														
 
															+		ret = 0;
														
 
															+		atomic_inc(&root->fs_info->tree_log_writers);
														
 
															+		root->fs_info->tree_log_batch++;
														
 
															+	}
														
 
															+	mutex_unlock(&root->fs_info->tree_log_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * indicate we're done making changes to the log tree
														
 
															+ * and wake up anyone waiting to do a sync
														
 
															+ */
														
 
															+static int end_log_trans(struct btrfs_root *root)
														
 
															+{
														
 
															+	atomic_dec(&root->fs_info->tree_log_writers);
														
 
															+	smp_mb();
														
 
															+	if (waitqueue_active(&root->fs_info->tree_log_wait))
														
 
															+		wake_up(&root->fs_info->tree_log_wait);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * the walk control struct is used to pass state down the chain when
														
 
															+ * processing the log tree.  The stage field tells us which part
														
 
															+ * of the log tree processing we are currently doing.  The others
														
 
															+ * are state fields used for that specific part
														
 
															+ */
														
 
															+struct walk_control {
														
 
															+	/* should we free the extent on disk when done?  This is used
														
 
															+	 * at transaction commit time while freeing a log tree
														
 
															+	 */
														
 
															+	int free;
														
 
															+
														
 
															+	/* should we write out the extent buffer?  This is used
														
 
															+	 * while flushing the log tree to disk during a sync
														
 
															+	 */
														
 
															+	int write;
														
 
															+
														
 
															+	/* should we wait for the extent buffer io to finish?  Also used
														
 
															+	 * while flushing the log tree to disk for a sync
														
 
															+	 */
														
 
															+	int wait;
														
 
															+
														
 
															+	/* pin only walk, we record which extents on disk belong to the
														
 
															+	 * log trees
														
 
															+	 */
														
 
															+	int pin;
														
 
															+
														
 
															+	/* what stage of the replay code we're currently in */
														
 
															+	int stage;
														
 
															+
														
 
															+	/* the root we are currently replaying */
														
 
															+	struct btrfs_root *replay_dest;
														
 
															+
														
 
															+	/* the trans handle for the current replay */
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+
														
 
															+	/* the function that gets used to process blocks we find in the
														
 
															+	 * tree.  Note the extent_buffer might not be up to date when it is
														
 
															+	 * passed in, and it must be checked or read if you need the data
														
 
															+	 * inside it
														
 
															+	 */
														
 
															+	int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
														
 
															+			    struct walk_control *wc, u64 gen);
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * process_func used to pin down extents, write them or wait on them
														
 
															+ */
														
 
															+static int process_one_buffer(struct btrfs_root *log,
														
 
															+			      struct extent_buffer *eb,
														
 
															+			      struct walk_control *wc, u64 gen)
														
 
															+{
														
 
															+	if (wc->pin) {
														
 
															+		mutex_lock(&log->fs_info->alloc_mutex);
														
 
															+		btrfs_update_pinned_extents(log->fs_info->extent_root,
														
 
															+					    eb->start, eb->len, 1);
														
 
															+		mutex_unlock(&log->fs_info->alloc_mutex);
														
 
															+	}
														
 
															+
														
 
															+	if (btrfs_buffer_uptodate(eb, gen)) {
														
 
															+		if (wc->write)
														
 
															+			btrfs_write_tree_block(eb);
														
 
															+		if (wc->wait)
														
 
															+			btrfs_wait_tree_block_writeback(eb);
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Item overwrite used by replay and tree logging.  eb, slot and key all refer
														
 
															+ * to the src data we are copying out.
														
 
															+ *
														
 
															+ * root is the tree we are copying into, and path is a scratch
														
 
															+ * path for use in this function (it should be released on entry and
														
 
															+ * will be released on exit).
														
 
															+ *
														
 
															+ * If the key is already in the destination tree the existing item is
														
 
															+ * overwritten.  If the existing item isn't big enough, it is extended.
														
 
															+ * If it is too large, it is truncated.
														
 
															+ *
														
 
															+ * If the key isn't in the destination yet, a new item is inserted.
														
 
															+ */
														
 
															+static noinline int overwrite_item(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *root,
														
 
															+				   struct btrfs_path *path,
														
 
															+				   struct extent_buffer *eb, int slot,
														
 
															+				   struct btrfs_key *key)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u32 item_size;
														
 
															+	u64 saved_i_size = 0;
														
 
															+	int save_old_i_size = 0;
														
 
															+	unsigned long src_ptr;
														
 
															+	unsigned long dst_ptr;
														
 
															+	int overwrite_root = 0;
														
 
															+
														
 
															+	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
														
 
															+		overwrite_root = 1;
														
 
															+
														
 
															+	item_size = btrfs_item_size_nr(eb, slot);
														
 
															+	src_ptr = btrfs_item_ptr_offset(eb, slot);
														
 
															+
														
 
															+	/* look for the key in the destination tree */
														
 
															+	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
														
 
															+	if (ret == 0) {
														
 
															+		char *src_copy;
														
 
															+		char *dst_copy;
														
 
															+		u32 dst_size = btrfs_item_size_nr(path->nodes[0],
														
 
															+						  path->slots[0]);
														
 
															+		if (dst_size != item_size)
														
 
															+			goto insert;
														
 
															+
														
 
															+		if (item_size == 0) {
														
 
															+			btrfs_release_path(root, path);
														
 
															+			return 0;
														
 
															+		}
														
 
															+		dst_copy = kmalloc(item_size, GFP_NOFS);
														
 
															+		src_copy = kmalloc(item_size, GFP_NOFS);
														
 
															+
														
 
															+		read_extent_buffer(eb, src_copy, src_ptr, item_size);
														
 
															+
														
 
															+		dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
														
 
															+		read_extent_buffer(path->nodes[0], dst_copy, dst_ptr,
														
 
															+				   item_size);
														
 
															+		ret = memcmp(dst_copy, src_copy, item_size);
														
 
															+
														
 
															+		kfree(dst_copy);
														
 
															+		kfree(src_copy);
														
 
															+		/*
														
 
															+		 * they have the same contents, just return, this saves
														
 
															+		 * us from cowing blocks in the destination tree and doing
														
 
															+		 * extra writes that may not have been done by a previous
														
 
															+		 * sync
														
 
															+		 */
														
 
															+		if (ret == 0) {
														
 
															+			btrfs_release_path(root, path);
														
 
															+			return 0;
														
 
															+		}
														
 
															+
														
 
															+	}
														
 
															+insert:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	/* try to insert the key into the destination tree */
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path,
														
 
															+				      key, item_size);
														
 
															+
														
 
															+	/* make sure any existing item is the correct size */
														
 
															+	if (ret == -EEXIST) {
														
 
															+		u32 found_size;
														
 
															+		found_size = btrfs_item_size_nr(path->nodes[0],
														
 
															+						path->slots[0]);
														
 
															+		if (found_size > item_size) {
														
 
															+			btrfs_truncate_item(trans, root, path, item_size, 1);
														
 
															+		} else if (found_size < item_size) {
														
 
															+			ret = btrfs_del_item(trans, root,
														
 
															+					     path);
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			btrfs_release_path(root, path);
														
 
															+			ret = btrfs_insert_empty_item(trans,
														
 
															+				  root, path, key, item_size);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+	} else if (ret) {
														
 
															+		BUG();
														
 
															+	}
														
 
															+	dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
														
 
															+					path->slots[0]);
														
 
															+
														
 
															+	/* don't overwrite an existing inode if the generation number
														
 
															+	 * was logged as zero.  This is done when the tree logging code
														
 
															+	 * is just logging an inode to make sure it exists after recovery.
														
 
															+	 *
														
 
															+	 * Also, don't overwrite i_size on directories during replay.
														
 
															+	 * log replay inserts and removes directory items based on the
														
 
															+	 * state of the tree found in the subvolume, and i_size is modified
														
 
															+	 * as it goes
														
 
															+	 */
														
 
															+	if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
														
 
															+		struct btrfs_inode_item *src_item;
														
 
															+		struct btrfs_inode_item *dst_item;
														
 
															+
														
 
															+		src_item = (struct btrfs_inode_item *)src_ptr;
														
 
															+		dst_item = (struct btrfs_inode_item *)dst_ptr;
														
 
															+
														
 
															+		if (btrfs_inode_generation(eb, src_item) == 0)
														
 
															+			goto no_copy;
														
 
															+
														
 
															+		if (overwrite_root &&
														
 
															+		    S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
														
 
															+		    S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) {
														
 
															+			save_old_i_size = 1;
														
 
															+			saved_i_size = btrfs_inode_size(path->nodes[0],
														
 
															+							dst_item);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	copy_extent_buffer(path->nodes[0], eb, dst_ptr,
														
 
															+			   src_ptr, item_size);
														
 
															+
														
 
															+	if (save_old_i_size) {
														
 
															+		struct btrfs_inode_item *dst_item;
														
 
															+		dst_item = (struct btrfs_inode_item *)dst_ptr;
														
 
															+		btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size);
														
 
															+	}
														
 
															+
														
 
															+	/* make sure the generation is filled in */
														
 
															+	if (key->type == BTRFS_INODE_ITEM_KEY) {
														
 
															+		struct btrfs_inode_item *dst_item;
														
 
															+		dst_item = (struct btrfs_inode_item *)dst_ptr;
														
 
															+		if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) {
														
 
															+			btrfs_set_inode_generation(path->nodes[0], dst_item,
														
 
															+						   trans->transid);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (overwrite_root &&
														
 
															+	    key->type == BTRFS_EXTENT_DATA_KEY) {
														
 
															+		int extent_type;
														
 
															+		struct btrfs_file_extent_item *fi;
														
 
															+
														
 
															+		fi = (struct btrfs_file_extent_item *)dst_ptr;
														
 
															+		extent_type = btrfs_file_extent_type(path->nodes[0], fi);
														
 
															+		if (extent_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+			struct btrfs_key ins;
														
 
															+			ins.objectid = btrfs_file_extent_disk_bytenr(
														
 
															+							path->nodes[0], fi);
														
 
															+			ins.offset = btrfs_file_extent_disk_num_bytes(
														
 
															+							path->nodes[0], fi);
														
 
															+			ins.type = BTRFS_EXTENT_ITEM_KEY;
														
 
															+
														
 
															+			/*
														
 
															+			 * is this extent already allocated in the extent
														
 
															+			 * allocation tree?  If so, just add a reference
														
 
															+			 */
														
 
															+			ret = btrfs_lookup_extent(root, ins.objectid,
														
 
															+						  ins.offset);
														
 
															+			if (ret == 0) {
														
 
															+				ret = btrfs_inc_extent_ref(trans, root,
														
 
															+						ins.objectid, ins.offset,
														
 
															+						path->nodes[0]->start,
														
 
															+						root->root_key.objectid,
														
 
															+						trans->transid, key->objectid);
														
 
															+			} else {
														
 
															+				/*
														
 
															+				 * insert the extent pointer in the extent
														
 
															+				 * allocation tree
														
 
															+				 */
														
 
															+				ret = btrfs_alloc_logged_extent(trans, root,
														
 
															+						path->nodes[0]->start,
														
 
															+						root->root_key.objectid,
														
 
															+						trans->transid, key->objectid,
														
 
															+						&ins);
														
 
															+				BUG_ON(ret);
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+no_copy:
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * simple helper to read an inode off the disk from a given root
														
 
															+ * This can only be called for subvolume roots and not for the log
														
 
															+ */
														
 
															+static noinline struct inode *read_one_inode(struct btrfs_root *root,
														
 
															+					     u64 objectid)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
														
 
															+	if (inode->i_state & I_NEW) {
														
 
															+		BTRFS_I(inode)->root = root;
														
 
															+		BTRFS_I(inode)->location.objectid = objectid;
														
 
															+		BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
														
 
															+		BTRFS_I(inode)->location.offset = 0;
														
 
															+		btrfs_read_locked_inode(inode);
														
 
															+		unlock_new_inode(inode);
														
 
															+
														
 
															+	}
														
 
															+	if (is_bad_inode(inode)) {
														
 
															+		iput(inode);
														
 
															+		inode = NULL;
														
 
															+	}
														
 
															+	return inode;
														
 
															+}
														
 
															+
														
 
															+/* replays a single extent in 'eb' at 'slot' with 'key' into the
														
 
															+ * subvolume 'root'.  path is released on entry and should be released
														
 
															+ * on exit.
														
 
															+ *
														
 
															+ * extents in the log tree have not been allocated out of the extent
														
 
															+ * tree yet.  So, this completes the allocation, taking a reference
														
 
															+ * as required if the extent already exists or creating a new extent
														
 
															+ * if it isn't in the extent allocation tree yet.
														
 
															+ *
														
 
															+ * The extent is inserted into the file, dropping any existing extents
														
 
															+ * from the file that overlap the new one.
														
 
															+ */
														
 
															+static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
														
 
															+				      struct btrfs_root *root,
														
 
															+				      struct btrfs_path *path,
														
 
															+				      struct extent_buffer *eb, int slot,
														
 
															+				      struct btrfs_key *key)
														
 
															+{
														
 
															+	int found_type;
														
 
															+	u64 mask = root->sectorsize - 1;
														
 
															+	u64 extent_end;
														
 
															+	u64 alloc_hint;
														
 
															+	u64 start = key->offset;
														
 
															+	struct btrfs_file_extent_item *item;
														
 
															+	struct inode *inode = NULL;
														
 
															+	unsigned long size;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
														
 
															+	found_type = btrfs_file_extent_type(eb, item);
														
 
															+
														
 
															+	if (found_type == BTRFS_FILE_EXTENT_REG)
														
 
															+		extent_end = start + btrfs_file_extent_num_bytes(eb, item);
														
 
															+	else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
														
 
															+		size = btrfs_file_extent_inline_len(eb,
														
 
															+						    btrfs_item_nr(eb, slot));
														
 
															+		extent_end = (start + size + mask) & ~mask;
														
 
															+	} else {
														
 
															+		ret = 0;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	inode = read_one_inode(root, key->objectid);
														
 
															+	if (!inode) {
														
 
															+		ret = -EIO;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * first check to see if we already have this extent in the
														
 
															+	 * file.  This must be done before the btrfs_drop_extents run
														
 
															+	 * so we don't try to drop this extent.
														
 
															+	 */
														
 
															+	ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
														
 
															+				       start, 0);
														
 
															+
														
 
															+	if (ret == 0 && found_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+		struct btrfs_file_extent_item cmp1;
														
 
															+		struct btrfs_file_extent_item cmp2;
														
 
															+		struct btrfs_file_extent_item *existing;
														
 
															+		struct extent_buffer *leaf;
														
 
															+
														
 
															+		leaf = path->nodes[0];
														
 
															+		existing = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+					  struct btrfs_file_extent_item);
														
 
															+
														
 
															+		read_extent_buffer(eb, &cmp1, (unsigned long)item,
														
 
															+				   sizeof(cmp1));
														
 
															+		read_extent_buffer(leaf, &cmp2, (unsigned long)existing,
														
 
															+				   sizeof(cmp2));
														
 
															+
														
 
															+		/*
														
 
															+		 * we already have a pointer to this exact extent,
														
 
															+		 * we don't have to do anything
														
 
															+		 */
														
 
															+		if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
														
 
															+			btrfs_release_path(root, path);
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	/* drop any overlapping extents */
														
 
															+	ret = btrfs_drop_extents(trans, root, inode,
														
 
															+			 start, extent_end, start, &alloc_hint);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	/* insert the extent */
														
 
															+	ret = overwrite_item(trans, root, path, eb, slot, key);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	/* btrfs_drop_extents changes i_bytes & i_blocks, update it here */
														
 
															+	inode_add_bytes(inode, extent_end - start);
														
 
															+	btrfs_update_inode(trans, root, inode);
														
 
															+out:
														
 
															+	if (inode)
														
 
															+		iput(inode);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when cleaning up conflicts between the directory names in the
														
 
															+ * subvolume, directory names in the log and directory names in the
														
 
															+ * inode back references, we may have to unlink inodes from directories.
														
 
															+ *
														
 
															+ * This is a helper function to do the unlink of a specific directory
														
 
															+ * item
														
 
															+ */
														
 
															+static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
														
 
															+				      struct btrfs_root *root,
														
 
															+				      struct btrfs_path *path,
														
 
															+				      struct inode *dir,
														
 
															+				      struct btrfs_dir_item *di)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	char *name;
														
 
															+	int name_len;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_key location;
														
 
															+	int ret;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+
														
 
															+	btrfs_dir_item_key_to_cpu(leaf, di, &location);
														
 
															+	name_len = btrfs_dir_name_len(leaf, di);
														
 
															+	name = kmalloc(name_len, GFP_NOFS);
														
 
															+	read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	inode = read_one_inode(root, location.objectid);
														
 
															+	BUG_ON(!inode);
														
 
															+
														
 
															+	btrfs_inc_nlink(inode);
														
 
															+	ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
														
 
															+	kfree(name);
														
 
															+
														
 
															+	iput(inode);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to see if a given name and sequence number found
														
 
															+ * in an inode back reference are already in a directory and correctly
														
 
															+ * point to this inode
														
 
															+ */
														
 
															+static noinline int inode_in_dir(struct btrfs_root *root,
														
 
															+				 struct btrfs_path *path,
														
 
															+				 u64 dirid, u64 objectid, u64 index,
														
 
															+				 const char *name, int name_len)
														
 
															+{
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_key location;
														
 
															+	int match = 0;
														
 
															+
														
 
															+	di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
														
 
															+					 index, name, name_len, 0);
														
 
															+	if (di && !IS_ERR(di)) {
														
 
															+		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
														
 
															+		if (location.objectid != objectid)
														
 
															+			goto out;
														
 
															+	} else
														
 
															+		goto out;
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
														
 
															+	if (di && !IS_ERR(di)) {
														
 
															+		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
														
 
															+		if (location.objectid != objectid)
														
 
															+			goto out;
														
 
															+	} else
														
 
															+		goto out;
														
 
															+	match = 1;
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return match;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to check a log tree for a named back reference in
														
 
															+ * an inode.  This is used to decide if a back reference that is
														
 
															+ * found in the subvolume conflicts with what we find in the log.
														
 
															+ *
														
 
															+ * inode backreferences may have multiple refs in a single item,
														
 
															+ * during replay we process one reference at a time, and we don't
														
 
															+ * want to delete valid links to a file from the subvolume if that
														
 
															+ * link is also in the log.
														
 
															+ */
														
 
															+static noinline int backref_in_log(struct btrfs_root *log,
														
 
															+				   struct btrfs_key *key,
														
 
															+				   char *name, int namelen)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_inode_ref *ref;
														
 
															+	unsigned long ptr;
														
 
															+	unsigned long ptr_end;
														
 
															+	unsigned long name_ptr;
														
 
															+	int found_name_len;
														
 
															+	int item_size;
														
 
															+	int ret;
														
 
															+	int match = 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
														
 
															+	if (ret != 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
														
 
															+	ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
														
 
															+	ptr_end = ptr + item_size;
														
 
															+	while (ptr < ptr_end) {
														
 
															+		ref = (struct btrfs_inode_ref *)ptr;
														
 
															+		found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
														
 
															+		if (found_name_len == namelen) {
														
 
															+			name_ptr = (unsigned long)(ref + 1);
														
 
															+			ret = memcmp_extent_buffer(path->nodes[0], name,
														
 
															+						   name_ptr, namelen);
														
 
															+			if (ret == 0) {
														
 
															+				match = 1;
														
 
															+				goto out;
														
 
															+			}
														
 
															+		}
														
 
															+		ptr = (unsigned long)(ref + 1) + found_name_len;
														
 
															+	}
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return match;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * replay one inode back reference item found in the log tree.
														
 
															+ * eb, slot and key refer to the buffer and key found in the log tree.
														
 
															+ * root is the destination we are replaying into, and path is for temp
														
 
															+ * use by this function.  (it should be released on return).
														
 
															+ */
														
 
															+static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
														
 
															+				  struct btrfs_root *root,
														
 
															+				  struct btrfs_root *log,
														
 
															+				  struct btrfs_path *path,
														
 
															+				  struct extent_buffer *eb, int slot,
														
 
															+				  struct btrfs_key *key)
														
 
															+{
														
 
															+	struct inode *dir;
														
 
															+	int ret;
														
 
															+	struct btrfs_key location;
														
 
															+	struct btrfs_inode_ref *ref;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct inode *inode;
														
 
															+	char *name;
														
 
															+	int namelen;
														
 
															+	unsigned long ref_ptr;
														
 
															+	unsigned long ref_end;
														
 
															+
														
 
															+	location.objectid = key->objectid;
														
 
															+	location.type = BTRFS_INODE_ITEM_KEY;
														
 
															+	location.offset = 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * it is possible that we didn't log all the parent directories
														
 
															+	 * for a given inode.  If we don't find the dir, just don't
														
 
															+	 * copy the back ref in.  The link count fixup code will take
														
 
															+	 * care of the rest
														
 
															+	 */
														
 
															+	dir = read_one_inode(root, key->offset);
														
 
															+	if (!dir)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	inode = read_one_inode(root, key->objectid);
														
 
															+	BUG_ON(!dir);
														
 
															+
														
 
															+	ref_ptr = btrfs_item_ptr_offset(eb, slot);
														
 
															+	ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
														
 
															+
														
 
															+again:
														
 
															+	ref = (struct btrfs_inode_ref *)ref_ptr;
														
 
															+
														
 
															+	namelen = btrfs_inode_ref_name_len(eb, ref);
														
 
															+	name = kmalloc(namelen, GFP_NOFS);
														
 
															+	BUG_ON(!name);
														
 
															+
														
 
															+	read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
														
 
															+
														
 
															+	/* if we already have a perfect match, we're done */
														
 
															+	if (inode_in_dir(root, path, dir->i_ino, inode->i_ino,
														
 
															+			 btrfs_inode_ref_index(eb, ref),
														
 
															+			 name, namelen)) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * look for a conflicting back reference in the metadata.
														
 
															+	 * if we find one we have to unlink that name of the file
														
 
															+	 * before we add our new link.  Later on, we overwrite any
														
 
															+	 * existing back reference, and we don't want to create
														
 
															+	 * dangling pointers in the directory.
														
 
															+	 */
														
 
															+conflict_again:
														
 
															+	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
														
 
															+	if (ret == 0) {
														
 
															+		char *victim_name;
														
 
															+		int victim_name_len;
														
 
															+		struct btrfs_inode_ref *victim_ref;
														
 
															+		unsigned long ptr;
														
 
															+		unsigned long ptr_end;
														
 
															+		struct extent_buffer *leaf = path->nodes[0];
														
 
															+
														
 
															+		/* are we trying to overwrite a back ref for the root directory
														
 
															+		 * if so, just jump out, we're done
														
 
															+		 */
														
 
															+		if (key->objectid == key->offset)
														
 
															+			goto out_nowrite;
														
 
															+
														
 
															+		/* check all the names in this back reference to see
														
 
															+		 * if they are in the log.  if so, we allow them to stay
														
 
															+		 * otherwise they must be unlinked as a conflict
														
 
															+		 */
														
 
															+		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
														
 
															+		ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]);
														
 
															+		while(ptr < ptr_end) {
														
 
															+			victim_ref = (struct btrfs_inode_ref *)ptr;
														
 
															+			victim_name_len = btrfs_inode_ref_name_len(leaf,
														
 
															+								   victim_ref);
														
 
															+			victim_name = kmalloc(victim_name_len, GFP_NOFS);
														
 
															+			BUG_ON(!victim_name);
														
 
															+
														
 
															+			read_extent_buffer(leaf, victim_name,
														
 
															+					   (unsigned long)(victim_ref + 1),
														
 
															+					   victim_name_len);
														
 
															+
														
 
															+			if (!backref_in_log(log, key, victim_name,
														
 
															+					    victim_name_len)) {
														
 
															+				btrfs_inc_nlink(inode);
														
 
															+				btrfs_release_path(root, path);
														
 
															+				ret = btrfs_unlink_inode(trans, root, dir,
														
 
															+							 inode, victim_name,
														
 
															+							 victim_name_len);
														
 
															+				kfree(victim_name);
														
 
															+				btrfs_release_path(root, path);
														
 
															+				goto conflict_again;
														
 
															+			}
														
 
															+			kfree(victim_name);
														
 
															+			ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
														
 
															+		}
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	/* look for a conflicting sequence number */
														
 
															+	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
														
 
															+					 btrfs_inode_ref_index(eb, ref),
														
 
															+					 name, namelen, 0);
														
 
															+	if (di && !IS_ERR(di)) {
														
 
															+		ret = drop_one_dir_item(trans, root, path, dir, di);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+
														
 
															+	/* look for a conflicting name */
														
 
															+	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
														
 
															+				   name, namelen, 0);
														
 
															+	if (di && !IS_ERR(di)) {
														
 
															+		ret = drop_one_dir_item(trans, root, path, dir, di);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	/* insert our name */
														
 
															+	ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
														
 
															+			     btrfs_inode_ref_index(eb, ref));
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_update_inode(trans, root, inode);
														
 
															+
														
 
															+out:
														
 
															+	ref_ptr = (unsigned long)(ref + 1) + namelen;
														
 
															+	kfree(name);
														
 
															+	if (ref_ptr < ref_end)
														
 
															+		goto again;
														
 
															+
														
 
															+	/* finally write the back reference in the inode */
														
 
															+	ret = overwrite_item(trans, root, path, eb, slot, key);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+out_nowrite:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	iput(dir);
														
 
															+	iput(inode);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * replay one csum item from the log tree into the subvolume 'root'
														
 
															+ * eb, slot and key all refer to the log tree
														
 
															+ * path is for temp use by this function and should be released on return
														
 
															+ *
														
 
															+ * This copies the checksums out of the log tree and inserts them into
														
 
															+ * the subvolume.  Any existing checksums for this range in the file
														
 
															+ * are overwritten, and new items are added where required.
														
 
															+ *
														
 
															+ * We keep this simple by reusing the btrfs_ordered_sum code from
														
 
															+ * the data=ordered mode.  This basically means making a copy
														
 
															+ * of all the checksums in ram, which we have to do anyway for kmap
														
 
															+ * rules.
														
 
															+ *
														
 
															+ * The copy is then sent down to btrfs_csum_file_blocks, which
														
 
															+ * does all the hard work of finding existing items in the file
														
 
															+ * or adding new ones.
														
 
															+ */
														
 
															+static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
														
 
															+				      struct btrfs_root *root,
														
 
															+				      struct btrfs_path *path,
														
 
															+				      struct extent_buffer *eb, int slot,
														
 
															+				      struct btrfs_key *key)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u32 item_size = btrfs_item_size_nr(eb, slot);
														
 
															+	u64 cur_offset;
														
 
															+	unsigned long file_bytes;
														
 
															+	struct btrfs_ordered_sum *sums;
														
 
															+	struct btrfs_sector_sum *sector_sum;
														
 
															+	struct inode *inode;
														
 
															+	unsigned long ptr;
														
 
															+
														
 
															+	file_bytes = (item_size / BTRFS_CRC32_SIZE) * root->sectorsize;
														
 
															+	inode = read_one_inode(root, key->objectid);
														
 
															+	if (!inode) {
														
 
															+		return -EIO;
														
 
															+	}
														
 
															+
														
 
															+	sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
														
 
															+	if (!sums) {
														
 
															+		iput(inode);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	INIT_LIST_HEAD(&sums->list);
														
 
															+	sums->len = file_bytes;
														
 
															+	sums->file_offset = key->offset;
														
 
															+
														
 
															+	/*
														
 
															+	 * copy all the sums into the ordered sum struct
														
 
															+	 */
														
 
															+	sector_sum = sums->sums;
														
 
															+	cur_offset = key->offset;
														
 
															+	ptr = btrfs_item_ptr_offset(eb, slot);
														
 
															+	while(item_size > 0) {
														
 
															+		sector_sum->offset = cur_offset;
														
 
															+		read_extent_buffer(eb, &sector_sum->sum, ptr, BTRFS_CRC32_SIZE);
														
 
															+		sector_sum++;
														
 
															+		item_size -= BTRFS_CRC32_SIZE;
														
 
															+		ptr += BTRFS_CRC32_SIZE;
														
 
															+		cur_offset += root->sectorsize;
														
 
															+	}
														
 
															+
														
 
															+	/* let btrfs_csum_file_blocks add them into the file */
														
 
															+	ret = btrfs_csum_file_blocks(trans, root, inode, sums);
														
 
															+	BUG_ON(ret);
														
 
															+	kfree(sums);
														
 
															+	iput(inode);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+/*
														
 
															+ * There are a few corners where the link count of the file can't
														
 
															+ * be properly maintained during replay.  So, instead of adding
														
 
															+ * lots of complexity to the log code, we just scan the backrefs
														
 
															+ * for any file that has been through replay.
														
 
															+ *
														
 
															+ * The scan will update the link count on the inode to reflect the
														
 
															+ * number of back refs found.  If it goes down to zero, the iput
														
 
															+ * will free the inode.
														
 
															+ */
														
 
															+static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
														
 
															+					   struct btrfs_root *root,
														
 
															+					   struct inode *inode)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	u64 nlink = 0;
														
 
															+	unsigned long ptr;
														
 
															+	unsigned long ptr_end;
														
 
															+	int name_len;
														
 
															+
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	key.type = BTRFS_INODE_REF_KEY;
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			break;
														
 
															+		if (ret > 0) {
														
 
															+			if (path->slots[0] == 0)
														
 
															+				break;
														
 
															+			path->slots[0]--;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &key,
														
 
															+				      path->slots[0]);
														
 
															+		if (key.objectid != inode->i_ino ||
														
 
															+		    key.type != BTRFS_INODE_REF_KEY)
														
 
															+			break;
														
 
															+		ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
														
 
															+		ptr_end = ptr + btrfs_item_size_nr(path->nodes[0],
														
 
															+						   path->slots[0]);
														
 
															+		while(ptr < ptr_end) {
														
 
															+			struct btrfs_inode_ref *ref;
														
 
															+
														
 
															+			ref = (struct btrfs_inode_ref *)ptr;
														
 
															+			name_len = btrfs_inode_ref_name_len(path->nodes[0],
														
 
															+							    ref);
														
 
															+			ptr = (unsigned long)(ref + 1) + name_len;
														
 
															+			nlink++;
														
 
															+		}
														
 
															+
														
 
															+		if (key.offset == 0)
														
 
															+			break;
														
 
															+		key.offset--;
														
 
															+		btrfs_release_path(root, path);
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	if (nlink != inode->i_nlink) {
														
 
															+		inode->i_nlink = nlink;
														
 
															+		btrfs_update_inode(trans, root, inode);
														
 
															+	}
														
 
															+	BTRFS_I(inode)->index_cnt = (u64)-1;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
														
 
															+					    struct btrfs_root *root,
														
 
															+					    struct btrfs_path *path)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct inode *inode;
														
 
															+
														
 
															+	key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
														
 
															+	key.type = BTRFS_ORPHAN_ITEM_KEY;
														
 
															+	key.offset = (u64)-1;
														
 
															+	while(1) {
														
 
															+		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+		if (ret < 0)
														
 
															+			break;
														
 
															+
														
 
															+		if (ret == 1) {
														
 
															+			if (path->slots[0] == 0)
														
 
															+				break;
														
 
															+			path->slots[0]--;
														
 
															+		}
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
														
 
															+		if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID ||
														
 
															+		    key.type != BTRFS_ORPHAN_ITEM_KEY)
														
 
															+			break;
														
 
															+
														
 
															+		ret = btrfs_del_item(trans, root, path);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		btrfs_release_path(root, path);
														
 
															+		inode = read_one_inode(root, key.offset);
														
 
															+		BUG_ON(!inode);
														
 
															+
														
 
															+		ret = fixup_inode_link_count(trans, root, inode);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		iput(inode);
														
 
															+
														
 
															+		if (key.offset == 0)
														
 
															+			break;
														
 
															+		key.offset--;
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+ * record a given inode in the fixup dir so we can check its link
														
 
															+ * count when replay is done.  The link count is incremented here
														
 
															+ * so the inode won't go away until we check it
														
 
															+ */
														
 
															+static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
														
 
															+				      struct btrfs_root *root,
														
 
															+				      struct btrfs_path *path,
														
 
															+				      u64 objectid)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	int ret = 0;
														
 
															+	struct inode *inode;
														
 
															+
														
 
															+	inode = read_one_inode(root, objectid);
														
 
															+	BUG_ON(!inode);
														
 
															+
														
 
															+	key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
														
 
															+	key.offset = objectid;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
														
 
															+
														
 
															+	btrfs_release_path(root, path);
														
 
															+	if (ret == 0) {
														
 
															+		btrfs_inc_nlink(inode);
														
 
															+		btrfs_update_inode(trans, root, inode);
														
 
															+	} else if (ret == -EEXIST) {
														
 
															+		ret = 0;
														
 
															+	} else {
														
 
															+		BUG();
														
 
															+	}
														
 
															+	iput(inode);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * when replaying the log for a directory, we only insert names
														
 
															+ * for inodes that actually exist.  This means an fsync on a directory
														
 
															+ * does not implicitly fsync all the new files in it
														
 
															+ */
														
 
															+static noinline int insert_one_name(struct btrfs_trans_handle *trans,
														
 
															+				    struct btrfs_root *root,
														
 
															+				    struct btrfs_path *path,
														
 
															+				    u64 dirid, u64 index,
														
 
															+				    char *name, int name_len, u8 type,
														
 
															+				    struct btrfs_key *location)
														
 
															+{
														
 
															+	struct inode *inode;
														
 
															+	struct inode *dir;
														
 
															+	int ret;
														
 
															+
														
 
															+	inode = read_one_inode(root, location->objectid);
														
 
															+	if (!inode)
														
 
															+		return -ENOENT;
														
 
															+
														
 
															+	dir = read_one_inode(root, dirid);
														
 
															+	if (!dir) {
														
 
															+		iput(inode);
														
 
															+		return -EIO;
														
 
															+	}
														
 
															+	ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
														
 
															+
														
 
															+	/* FIXME, put inode into FIXUP list */
														
 
															+
														
 
															+	iput(inode);
														
 
															+	iput(dir);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * take a single entry in a log directory item and replay it into
														
 
															+ * the subvolume.
														
 
															+ *
														
 
															+ * if a conflicting item exists in the subdirectory already,
														
 
															+ * the inode it points to is unlinked and put into the link count
														
 
															+ * fix up tree.
														
 
															+ *
														
 
															+ * If a name from the log points to a file or directory that does
														
 
															+ * not exist in the FS, it is skipped.  fsyncs on directories
														
 
															+ * do not force down inodes inside that directory, just changes to the
														
 
															+ * names or unlinks in a directory.
														
 
															+ */
														
 
															+static noinline int replay_one_name(struct btrfs_trans_handle *trans,
														
 
															+				    struct btrfs_root *root,
														
 
															+				    struct btrfs_path *path,
														
 
															+				    struct extent_buffer *eb,
														
 
															+				    struct btrfs_dir_item *di,
														
 
															+				    struct btrfs_key *key)
														
 
															+{
														
 
															+	char *name;
														
 
															+	int name_len;
														
 
															+	struct btrfs_dir_item *dst_di;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_key log_key;
														
 
															+	struct inode *dir;
														
 
															+	u8 log_type;
														
 
															+	int exists;
														
 
															+	int ret;
														
 
															+
														
 
															+	dir = read_one_inode(root, key->objectid);
														
 
															+	BUG_ON(!dir);
														
 
															+
														
 
															+	name_len = btrfs_dir_name_len(eb, di);
														
 
															+	name = kmalloc(name_len, GFP_NOFS);
														
 
															+	log_type = btrfs_dir_type(eb, di);
														
 
															+	read_extent_buffer(eb, name, (unsigned long)(di + 1),
														
 
															+		   name_len);
														
 
															+
														
 
															+	btrfs_dir_item_key_to_cpu(eb, di, &log_key);
														
 
															+	exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
														
 
															+	if (exists == 0)
														
 
															+		exists = 1;
														
 
															+	else
														
 
															+		exists = 0;
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	if (key->type == BTRFS_DIR_ITEM_KEY) {
														
 
															+		dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
														
 
															+				       name, name_len, 1);
														
 
															+	}
														
 
															+	else if (key->type == BTRFS_DIR_INDEX_KEY) {
														
 
															+		dst_di = btrfs_lookup_dir_index_item(trans, root, path,
														
 
															+						     key->objectid,
														
 
															+						     key->offset, name,
														
 
															+						     name_len, 1);
														
 
															+	} else {
														
 
															+		BUG();
														
 
															+	}
														
 
															+	if (!dst_di || IS_ERR(dst_di)) {
														
 
															+		/* we need a sequence number to insert, so we only
														
 
															+		 * do inserts for the BTRFS_DIR_INDEX_KEY types
														
 
															+		 */
														
 
															+		if (key->type != BTRFS_DIR_INDEX_KEY)
														
 
															+			goto out;
														
 
															+		goto insert;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
														
 
															+	/* the existing item matches the logged item */
														
 
															+	if (found_key.objectid == log_key.objectid &&
														
 
															+	    found_key.type == log_key.type &&
														
 
															+	    found_key.offset == log_key.offset &&
														
 
															+	    btrfs_dir_type(path->nodes[0], dst_di) == log_type) {
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * don't drop the conflicting directory entry if the inode
														
 
															+	 * for the new entry doesn't exist
														
 
															+	 */
														
 
															+	if (!exists)
														
 
															+		goto out;
														
 
															+
														
 
															+	ret = drop_one_dir_item(trans, root, path, dir, dst_di);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (key->type == BTRFS_DIR_INDEX_KEY)
														
 
															+		goto insert;
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	kfree(name);
														
 
															+	iput(dir);
														
 
															+	return 0;
														
 
															+
														
 
															+insert:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	ret = insert_one_name(trans, root, path, key->objectid, key->offset,
														
 
															+			      name, name_len, log_type, &log_key);
														
 
															+
														
 
															+	if (ret && ret != -ENOENT)
														
 
															+		BUG();
														
 
															+	goto out;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * find all the names in a directory item and reconcile them into
														
 
															+ * the subvolume.  Only BTRFS_DIR_ITEM_KEY types will have more than
														
 
															+ * one name in a directory item, but the same code gets used for
														
 
															+ * both directory index types
														
 
															+ */
														
 
															+static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
														
 
															+					struct btrfs_root *root,
														
 
															+					struct btrfs_path *path,
														
 
															+					struct extent_buffer *eb, int slot,
														
 
															+					struct btrfs_key *key)
														
 
															+{
														
 
															+	int ret;
														
 
															+	u32 item_size = btrfs_item_size_nr(eb, slot);
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	int name_len;
														
 
															+	unsigned long ptr;
														
 
															+	unsigned long ptr_end;
														
 
															+
														
 
															+	ptr = btrfs_item_ptr_offset(eb, slot);
														
 
															+	ptr_end = ptr + item_size;
														
 
															+	while(ptr < ptr_end) {
														
 
															+		di = (struct btrfs_dir_item *)ptr;
														
 
															+		name_len = btrfs_dir_name_len(eb, di);
														
 
															+		ret = replay_one_name(trans, root, path, eb, di, key);
														
 
															+		BUG_ON(ret);
														
 
															+		ptr = (unsigned long)(di + 1);
														
 
															+		ptr += name_len;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * directory replay has two parts.  There are the standard directory
														
 
															+ * items in the log copied from the subvolume, and range items
														
 
															+ * created in the log while the subvolume was logged.
														
 
															+ *
														
 
															+ * The range items tell us which parts of the key space the log
														
 
															+ * is authoritative for.  During replay, if a key in the subvolume
														
 
															+ * directory is in a logged range item, but not actually in the log
														
 
															+ * that means it was deleted from the directory before the fsync
														
 
															+ * and should be removed.
														
 
															+ */
														
 
															+static noinline int find_dir_range(struct btrfs_root *root,
														
 
															+				   struct btrfs_path *path,
														
 
															+				   u64 dirid, int key_type,
														
 
															+				   u64 *start_ret, u64 *end_ret)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	u64 found_end;
														
 
															+	struct btrfs_dir_log_item *item;
														
 
															+	int ret;
														
 
															+	int nritems;
														
 
															+
														
 
															+	if (*start_ret == (u64)-1)
														
 
															+		return 1;
														
 
															+
														
 
															+	key.objectid = dirid;
														
 
															+	key.type = key_type;
														
 
															+	key.offset = *start_ret;
														
 
															+
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+	if (ret > 0) {
														
 
															+		if (path->slots[0] == 0)
														
 
															+			goto out;
														
 
															+		path->slots[0]--;
														
 
															+	}
														
 
															+	if (ret != 0)
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
														
 
															+
														
 
															+	if (key.type != key_type || key.objectid != dirid) {
														
 
															+		ret = 1;
														
 
															+		goto next;
														
 
															+	}
														
 
															+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+			      struct btrfs_dir_log_item);
														
 
															+	found_end = btrfs_dir_log_end(path->nodes[0], item);
														
 
															+
														
 
															+	if (*start_ret >= key.offset && *start_ret <= found_end) {
														
 
															+		ret = 0;
														
 
															+		*start_ret = key.offset;
														
 
															+		*end_ret = found_end;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	ret = 1;
														
 
															+next:
														
 
															+	/* check the next slot in the tree to see if it is a valid item */
														
 
															+	nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+	if (path->slots[0] >= nritems) {
														
 
															+		ret = btrfs_next_leaf(root, path);
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+	} else {
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
														
 
															+
														
 
															+	if (key.type != key_type || key.objectid != dirid) {
														
 
															+		ret = 1;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+			      struct btrfs_dir_log_item);
														
 
															+	found_end = btrfs_dir_log_end(path->nodes[0], item);
														
 
															+	*start_ret = key.offset;
														
 
															+	*end_ret = found_end;
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this looks for a given directory item in the log.  If the directory
														
 
															+ * item is not in the log, the item is removed and the inode it points
														
 
															+ * to is unlinked
														
 
															+ */
														
 
															+static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
														
 
															+				      struct btrfs_root *root,
														
 
															+				      struct btrfs_root *log,
														
 
															+				      struct btrfs_path *path,
														
 
															+				      struct btrfs_path *log_path,
														
 
															+				      struct inode *dir,
														
 
															+				      struct btrfs_key *dir_key)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct extent_buffer *eb;
														
 
															+	int slot;
														
 
															+	u32 item_size;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_dir_item *log_di;
														
 
															+	int name_len;
														
 
															+	unsigned long ptr;
														
 
															+	unsigned long ptr_end;
														
 
															+	char *name;
														
 
															+	struct inode *inode;
														
 
															+	struct btrfs_key location;
														
 
															+
														
 
															+again:
														
 
															+	eb = path->nodes[0];
														
 
															+	slot = path->slots[0];
														
 
															+	item_size = btrfs_item_size_nr(eb, slot);
														
 
															+	ptr = btrfs_item_ptr_offset(eb, slot);
														
 
															+	ptr_end = ptr + item_size;
														
 
															+	while(ptr < ptr_end) {
														
 
															+		di = (struct btrfs_dir_item *)ptr;
														
 
															+		name_len = btrfs_dir_name_len(eb, di);
														
 
															+		name = kmalloc(name_len, GFP_NOFS);
														
 
															+		if (!name) {
														
 
															+			ret = -ENOMEM;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		read_extent_buffer(eb, name, (unsigned long)(di + 1),
														
 
															+				  name_len);
														
 
															+		log_di = NULL;
														
 
															+		if (dir_key->type == BTRFS_DIR_ITEM_KEY) {
														
 
															+			log_di = btrfs_lookup_dir_item(trans, log, log_path,
														
 
															+						       dir_key->objectid,
														
 
															+						       name, name_len, 0);
														
 
															+		} else if (dir_key->type == BTRFS_DIR_INDEX_KEY) {
														
 
															+			log_di = btrfs_lookup_dir_index_item(trans, log,
														
 
															+						     log_path,
														
 
															+						     dir_key->objectid,
														
 
															+						     dir_key->offset,
														
 
															+						     name, name_len, 0);
														
 
															+		}
														
 
															+		if (!log_di || IS_ERR(log_di)) {
														
 
															+			btrfs_dir_item_key_to_cpu(eb, di, &location);
														
 
															+			btrfs_release_path(root, path);
														
 
															+			btrfs_release_path(log, log_path);
														
 
															+			inode = read_one_inode(root, location.objectid);
														
 
															+			BUG_ON(!inode);
														
 
															+
														
 
															+			ret = link_to_fixup_dir(trans, root,
														
 
															+						path, location.objectid);
														
 
															+			BUG_ON(ret);
														
 
															+			btrfs_inc_nlink(inode);
														
 
															+			ret = btrfs_unlink_inode(trans, root, dir, inode,
														
 
															+						 name, name_len);
														
 
															+			BUG_ON(ret);
														
 
															+			kfree(name);
														
 
															+			iput(inode);
														
 
															+
														
 
															+			/* there might still be more names under this key
														
 
															+			 * check and repeat if required
														
 
															+			 */
														
 
															+			ret = btrfs_search_slot(NULL, root, dir_key, path,
														
 
															+						0, 0);
														
 
															+			if (ret == 0)
														
 
															+				goto again;
														
 
															+			ret = 0;
														
 
															+			goto out;
														
 
															+		}
														
 
															+		btrfs_release_path(log, log_path);
														
 
															+		kfree(name);
														
 
															+
														
 
															+		ptr = (unsigned long)(di + 1);
														
 
															+		ptr += name_len;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_release_path(log, log_path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * deletion replay happens before we copy any new directory items
														
 
															+ * out of the log or out of backreferences from inodes.  It
														
 
															+ * scans the log to find ranges of keys that log is authoritative for,
														
 
															+ * and then scans the directory to find items in those ranges that are
														
 
															+ * not present in the log.
														
 
															+ *
														
 
															+ * Anything we don't find in the log is unlinked and removed from the
														
 
															+ * directory.
														
 
															+ */
														
 
															+static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
														
 
															+				       struct btrfs_root *root,
														
 
															+				       struct btrfs_root *log,
														
 
															+				       struct btrfs_path *path,
														
 
															+				       u64 dirid)
														
 
															+{
														
 
															+	u64 range_start;
														
 
															+	u64 range_end;
														
 
															+	int key_type = BTRFS_DIR_LOG_ITEM_KEY;
														
 
															+	int ret = 0;
														
 
															+	struct btrfs_key dir_key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_path *log_path;
														
 
															+	struct inode *dir;
														
 
															+
														
 
															+	dir_key.objectid = dirid;
														
 
															+	dir_key.type = BTRFS_DIR_ITEM_KEY;
														
 
															+	log_path = btrfs_alloc_path();
														
 
															+	if (!log_path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	dir = read_one_inode(root, dirid);
														
 
															+	/* it isn't an error if the inode isn't there, that can happen
														
 
															+	 * because we replay the deletes before we copy in the inode item
														
 
															+	 * from the log
														
 
															+	 */
														
 
															+	if (!dir) {
														
 
															+		btrfs_free_path(log_path);
														
 
															+		return 0;
														
 
															+	}
														
 
															+again:
														
 
															+	range_start = 0;
														
 
															+	range_end = 0;
														
 
															+	while(1) {
														
 
															+		ret = find_dir_range(log, path, dirid, key_type,
														
 
															+				     &range_start, &range_end);
														
 
															+		if (ret != 0)
														
 
															+			break;
														
 
															+
														
 
															+		dir_key.offset = range_start;
														
 
															+		while(1) {
														
 
															+			int nritems;
														
 
															+			ret = btrfs_search_slot(NULL, root, &dir_key, path,
														
 
															+						0, 0);
														
 
															+			if (ret < 0)
														
 
															+				goto out;
														
 
															+
														
 
															+			nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+			if (path->slots[0] >= nritems) {
														
 
															+				ret = btrfs_next_leaf(root, path);
														
 
															+				if (ret)
														
 
															+					break;
														
 
															+			}
														
 
															+			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+					      path->slots[0]);
														
 
															+			if (found_key.objectid != dirid ||
														
 
															+			    found_key.type != dir_key.type)
														
 
															+				goto next_type;
														
 
															+
														
 
															+			if (found_key.offset > range_end)
														
 
															+				break;
														
 
															+
														
 
															+			ret = check_item_in_log(trans, root, log, path,
														
 
															+						log_path, dir, &found_key);
														
 
															+			BUG_ON(ret);
														
 
															+			if (found_key.offset == (u64)-1)
														
 
															+				break;
														
 
															+			dir_key.offset = found_key.offset + 1;
														
 
															+		}
														
 
															+		btrfs_release_path(root, path);
														
 
															+		if (range_end == (u64)-1)
														
 
															+			break;
														
 
															+		range_start = range_end + 1;
														
 
															+	}
														
 
															+
														
 
															+next_type:
														
 
															+	ret = 0;
														
 
															+	if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
														
 
															+		key_type = BTRFS_DIR_LOG_INDEX_KEY;
														
 
															+		dir_key.type = BTRFS_DIR_INDEX_KEY;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		goto again;
														
 
															+	}
														
 
															+out:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_free_path(log_path);
														
 
															+	iput(dir);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * the process_func used to replay items from the log tree.  This
														
 
															+ * gets called in two different stages.  The first stage just looks
														
 
															+ * for inodes and makes sure they are all copied into the subvolume.
														
 
															+ *
														
 
															+ * The second stage copies all the other item types from the log into
														
 
															+ * the subvolume.  The two stage approach is slower, but gets rid of
														
 
															+ * lots of complexity around inodes referencing other inodes that exist
														
 
															+ * only in the log (references come from either directory items or inode
														
 
															+ * back refs).
														
 
															+ */
														
 
															+static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
														
 
															+			     struct walk_control *wc, u64 gen)
														
 
															+{
														
 
															+	int nritems;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_root *root = wc->replay_dest;
														
 
															+	struct btrfs_key key;
														
 
															+	u32 item_size;
														
 
															+	int level;
														
 
															+	int i;
														
 
															+	int ret;
														
 
															+
														
 
															+	btrfs_read_buffer(eb, gen);
														
 
															+
														
 
															+	level = btrfs_header_level(eb);
														
 
															+
														
 
															+	if (level != 0)
														
 
															+		return 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	nritems = btrfs_header_nritems(eb);
														
 
															+	for (i = 0; i < nritems; i++) {
														
 
															+		btrfs_item_key_to_cpu(eb, &key, i);
														
 
															+		item_size = btrfs_item_size_nr(eb, i);
														
 
															+
														
 
															+		/* inode keys are done during the first stage */
														
 
															+		if (key.type == BTRFS_INODE_ITEM_KEY &&
														
 
															+		    wc->stage == LOG_WALK_REPLAY_INODES) {
														
 
															+			struct inode *inode;
														
 
															+			struct btrfs_inode_item *inode_item;
														
 
															+			u32 mode;
														
 
															+
														
 
															+			inode_item = btrfs_item_ptr(eb, i,
														
 
															+					    struct btrfs_inode_item);
														
 
															+			mode = btrfs_inode_mode(eb, inode_item);
														
 
															+			if (S_ISDIR(mode)) {
														
 
															+				ret = replay_dir_deletes(wc->trans,
														
 
															+					 root, log, path, key.objectid);
														
 
															+				BUG_ON(ret);
														
 
															+			}
														
 
															+			ret = overwrite_item(wc->trans, root, path,
														
 
															+					     eb, i, &key);
														
 
															+			BUG_ON(ret);
														
 
															+
														
 
															+			/* for regular files, truncate away
														
 
															+			 * extents past the new EOF
														
 
															+			 */
														
 
															+			if (S_ISREG(mode)) {
														
 
															+				inode = read_one_inode(root,
														
 
															+						       key.objectid);
														
 
															+				BUG_ON(!inode);
														
 
															+
														
 
															+				ret = btrfs_truncate_inode_items(wc->trans,
														
 
															+					root, inode, inode->i_size,
														
 
															+					BTRFS_EXTENT_DATA_KEY);
														
 
															+				BUG_ON(ret);
														
 
															+				iput(inode);
														
 
															+			}
														
 
															+			ret = link_to_fixup_dir(wc->trans, root,
														
 
															+						path, key.objectid);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+		if (wc->stage < LOG_WALK_REPLAY_ALL)
														
 
															+			continue;
														
 
															+
														
 
															+		/* these keys are simply copied */
														
 
															+		if (key.type == BTRFS_XATTR_ITEM_KEY) {
														
 
															+			ret = overwrite_item(wc->trans, root, path,
														
 
															+					     eb, i, &key);
														
 
															+			BUG_ON(ret);
														
 
															+		} else if (key.type == BTRFS_INODE_REF_KEY) {
														
 
															+			ret = add_inode_ref(wc->trans, root, log, path,
														
 
															+					    eb, i, &key);
														
 
															+			BUG_ON(ret && ret != -ENOENT);
														
 
															+		} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
														
 
															+			ret = replay_one_extent(wc->trans, root, path,
														
 
															+						eb, i, &key);
														
 
															+			BUG_ON(ret);
														
 
															+		} else if (key.type == BTRFS_CSUM_ITEM_KEY) {
														
 
															+			ret = replay_one_csum(wc->trans, root, path,
														
 
															+					      eb, i, &key);
														
 
															+			BUG_ON(ret);
														
 
															+		} else if (key.type == BTRFS_DIR_ITEM_KEY ||
														
 
															+			   key.type == BTRFS_DIR_INDEX_KEY) {
														
 
															+			ret = replay_one_dir_item(wc->trans, root, path,
														
 
															+						  eb, i, &key);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans,
														
 
															+				   struct btrfs_root *root,
														
 
															+				   struct btrfs_path *path, int *level,
														
 
															+				   struct walk_control *wc)
														
 
															+{
														
 
															+	u64 root_owner;
														
 
															+	u64 root_gen;
														
 
															+	u64 bytenr;
														
 
															+	u64 ptr_gen;
														
 
															+	struct extent_buffer *next;
														
 
															+	struct extent_buffer *cur;
														
 
															+	struct extent_buffer *parent;
														
 
															+	u32 blocksize;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	WARN_ON(*level < 0);
														
 
															+	WARN_ON(*level >= BTRFS_MAX_LEVEL);
														
 
															+
														
 
															+	while(*level > 0) {
														
 
															+		WARN_ON(*level < 0);
														
 
															+		WARN_ON(*level >= BTRFS_MAX_LEVEL);
														
 
															+		cur = path->nodes[*level];
														
 
															+
														
 
															+		if (btrfs_header_level(cur) != *level)
														
 
															+			WARN_ON(1);
														
 
															+
														
 
															+		if (path->slots[*level] >=
														
 
															+		    btrfs_header_nritems(cur))
														
 
															+			break;
														
 
															+
														
 
															+		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
														
 
															+		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
														
 
															+		blocksize = btrfs_level_size(root, *level - 1);
														
 
															+
														
 
															+		parent = path->nodes[*level];
														
 
															+		root_owner = btrfs_header_owner(parent);
														
 
															+		root_gen = btrfs_header_generation(parent);
														
 
															+
														
 
															+		next = btrfs_find_create_tree_block(root, bytenr, blocksize);
														
 
															+
														
 
															+		wc->process_func(root, next, wc, ptr_gen);
														
 
															+
														
 
															+		if (*level == 1) {
														
 
															+			path->slots[*level]++;
														
 
															+			if (wc->free) {
														
 
															+				btrfs_read_buffer(next, ptr_gen);
														
 
															+
														
 
															+				btrfs_tree_lock(next);
														
 
															+				clean_tree_block(trans, root, next);
														
 
															+				btrfs_wait_tree_block_writeback(next);
														
 
															+				btrfs_tree_unlock(next);
														
 
															+
														
 
															+				ret = btrfs_drop_leaf_ref(trans, root, next);
														
 
															+				BUG_ON(ret);
														
 
															+
														
 
															+				WARN_ON(root_owner !=
														
 
															+					BTRFS_TREE_LOG_OBJECTID);
														
 
															+				ret = btrfs_free_reserved_extent(root,
														
 
															+							 bytenr, blocksize);
														
 
															+				BUG_ON(ret);
														
 
															+			}
														
 
															+			free_extent_buffer(next);
														
 
															+			continue;
														
 
															+		}
														
 
															+		btrfs_read_buffer(next, ptr_gen);
														
 
															+
														
 
															+		WARN_ON(*level <= 0);
														
 
															+		if (path->nodes[*level-1])
														
 
															+			free_extent_buffer(path->nodes[*level-1]);
														
 
															+		path->nodes[*level-1] = next;
														
 
															+		*level = btrfs_header_level(next);
														
 
															+		path->slots[*level] = 0;
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+	WARN_ON(*level < 0);
														
 
															+	WARN_ON(*level >= BTRFS_MAX_LEVEL);
														
 
															+
														
 
															+	if (path->nodes[*level] == root->node) {
														
 
															+		parent = path->nodes[*level];
														
 
															+	} else {
														
 
															+		parent = path->nodes[*level + 1];
														
 
															+	}
														
 
															+	bytenr = path->nodes[*level]->start;
														
 
															+
														
 
															+	blocksize = btrfs_level_size(root, *level);
														
 
															+	root_owner = btrfs_header_owner(parent);
														
 
															+	root_gen = btrfs_header_generation(parent);
														
 
															+
														
 
															+	wc->process_func(root, path->nodes[*level], wc,
														
 
															+			 btrfs_header_generation(path->nodes[*level]));
														
 
															+
														
 
															+	if (wc->free) {
														
 
															+		next = path->nodes[*level];
														
 
															+		btrfs_tree_lock(next);
														
 
															+		clean_tree_block(trans, root, next);
														
 
															+		btrfs_wait_tree_block_writeback(next);
														
 
															+		btrfs_tree_unlock(next);
														
 
															+
														
 
															+		if (*level == 0) {
														
 
															+			ret = btrfs_drop_leaf_ref(trans, root, next);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+		WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
														
 
															+		ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	free_extent_buffer(path->nodes[*level]);
														
 
															+	path->nodes[*level] = NULL;
														
 
															+	*level += 1;
														
 
															+
														
 
															+	cond_resched();
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *root,
														
 
															+				 struct btrfs_path *path, int *level,
														
 
															+				 struct walk_control *wc)
														
 
															+{
														
 
															+	u64 root_owner;
														
 
															+	u64 root_gen;
														
 
															+	int i;
														
 
															+	int slot;
														
 
															+	int ret;
														
 
															+
														
 
															+	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
														
 
															+		slot = path->slots[i];
														
 
															+		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
														
 
															+			struct extent_buffer *node;
														
 
															+			node = path->nodes[i];
														
 
															+			path->slots[i]++;
														
 
															+			*level = i;
														
 
															+			WARN_ON(*level == 0);
														
 
															+			return 0;
														
 
															+		} else {
														
 
															+			struct extent_buffer *parent;
														
 
															+			if (path->nodes[*level] == root->node)
														
 
															+				parent = path->nodes[*level];
														
 
															+			else
														
 
															+				parent = path->nodes[*level + 1];
														
 
															+
														
 
															+			root_owner = btrfs_header_owner(parent);
														
 
															+			root_gen = btrfs_header_generation(parent);
														
 
															+			wc->process_func(root, path->nodes[*level], wc,
														
 
															+				 btrfs_header_generation(path->nodes[*level]));
														
 
															+			if (wc->free) {
														
 
															+				struct extent_buffer *next;
														
 
															+
														
 
															+				next = path->nodes[*level];
														
 
															+
														
 
															+				btrfs_tree_lock(next);
														
 
															+				clean_tree_block(trans, root, next);
														
 
															+				btrfs_wait_tree_block_writeback(next);
														
 
															+				btrfs_tree_unlock(next);
														
 
															+
														
 
															+				if (*level == 0) {
														
 
															+					ret = btrfs_drop_leaf_ref(trans, root,
														
 
															+								  next);
														
 
															+					BUG_ON(ret);
														
 
															+				}
														
 
															+
														
 
															+				WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
														
 
															+				ret = btrfs_free_reserved_extent(root,
														
 
															+						path->nodes[*level]->start,
														
 
															+						path->nodes[*level]->len);
														
 
															+				BUG_ON(ret);
														
 
															+			}
														
 
															+			free_extent_buffer(path->nodes[*level]);
														
 
															+			path->nodes[*level] = NULL;
														
 
															+			*level = i + 1;
														
 
															+		}
														
 
															+	}
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * drop the reference count on the tree rooted at 'snap'.  This traverses
														
 
															+ * the tree freeing any blocks that have a ref count of zero after being
														
 
															+ * decremented.
														
 
															+ */
														
 
															+static int walk_log_tree(struct btrfs_trans_handle *trans,
														
 
															+			 struct btrfs_root *log, struct walk_control *wc)
														
 
															+{
														
 
															+	int ret = 0;
														
 
															+	int wret;
														
 
															+	int level;
														
 
															+	struct btrfs_path *path;
														
 
															+	int i;
														
 
															+	int orig_level;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	level = btrfs_header_level(log->node);
														
 
															+	orig_level = level;
														
 
															+	path->nodes[level] = log->node;
														
 
															+	extent_buffer_get(log->node);
														
 
															+	path->slots[level] = 0;
														
 
															+
														
 
															+	while(1) {
														
 
															+		wret = walk_down_log_tree(trans, log, path, &level, wc);
														
 
															+		if (wret > 0)
														
 
															+			break;
														
 
															+		if (wret < 0)
														
 
															+			ret = wret;
														
 
															+
														
 
															+		wret = walk_up_log_tree(trans, log, path, &level, wc);
														
 
															+		if (wret > 0)
														
 
															+			break;
														
 
															+		if (wret < 0)
														
 
															+			ret = wret;
														
 
															+	}
														
 
															+
														
 
															+	/* was the root node processed? if not, catch it here */
														
 
															+	if (path->nodes[orig_level]) {
														
 
															+		wc->process_func(log, path->nodes[orig_level], wc,
														
 
															+			 btrfs_header_generation(path->nodes[orig_level]));
														
 
															+		if (wc->free) {
														
 
															+			struct extent_buffer *next;
														
 
															+
														
 
															+			next = path->nodes[orig_level];
														
 
															+
														
 
															+			btrfs_tree_lock(next);
														
 
															+			clean_tree_block(trans, log, next);
														
 
															+			btrfs_wait_tree_block_writeback(next);
														
 
															+			btrfs_tree_unlock(next);
														
 
															+
														
 
															+			if (orig_level == 0) {
														
 
															+				ret = btrfs_drop_leaf_ref(trans, log,
														
 
															+							  next);
														
 
															+				BUG_ON(ret);
														
 
															+			}
														
 
															+			WARN_ON(log->root_key.objectid !=
														
 
															+				BTRFS_TREE_LOG_OBJECTID);
														
 
															+			ret = btrfs_free_reserved_extent(log, next->start,
														
 
															+							 next->len);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	for (i = 0; i <= orig_level; i++) {
														
 
															+		if (path->nodes[i]) {
														
 
															+			free_extent_buffer(path->nodes[i]);
														
 
															+			path->nodes[i] = NULL;
														
 
															+		}
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	if (wc->free)
														
 
															+		free_extent_buffer(log->node);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int wait_log_commit(struct btrfs_root *log)
														
 
															+{
														
 
															+	DEFINE_WAIT(wait);
														
 
															+	u64 transid = log->fs_info->tree_log_transid;
														
 
															+
														
 
															+	do {
														
 
															+		prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
														
 
															+				TASK_UNINTERRUPTIBLE);
														
 
															+		mutex_unlock(&log->fs_info->tree_log_mutex);
														
 
															+		if (atomic_read(&log->fs_info->tree_log_commit))
														
 
															+			schedule();
														
 
															+		finish_wait(&log->fs_info->tree_log_wait, &wait);
														
 
															+		mutex_lock(&log->fs_info->tree_log_mutex);
														
 
															+	} while(transid == log->fs_info->tree_log_transid &&
														
 
															+		atomic_read(&log->fs_info->tree_log_commit));
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * btrfs_sync_log does sends a given tree log down to the disk and
														
 
															+ * updates the super blocks to record it.  When this call is done,
														
 
															+ * you know that any inodes previously logged are safely on disk
														
 
															+ */
														
 
															+int btrfs_sync_log(struct btrfs_trans_handle *trans,
														
 
															+		   struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	unsigned long batch;
														
 
															+	struct btrfs_root *log = root->log_root;
														
 
															+
														
 
															+	mutex_lock(&log->fs_info->tree_log_mutex);
														
 
															+	if (atomic_read(&log->fs_info->tree_log_commit)) {
														
 
															+		wait_log_commit(log);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	atomic_set(&log->fs_info->tree_log_commit, 1);
														
 
															+
														
 
															+	while(1) {
														
 
															+		batch = log->fs_info->tree_log_batch;
														
 
															+		mutex_unlock(&log->fs_info->tree_log_mutex);
														
 
															+		schedule_timeout_uninterruptible(1);
														
 
															+		mutex_lock(&log->fs_info->tree_log_mutex);
														
 
															+
														
 
															+		while(atomic_read(&log->fs_info->tree_log_writers)) {
														
 
															+			DEFINE_WAIT(wait);
														
 
															+			prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
														
 
															+					TASK_UNINTERRUPTIBLE);
														
 
															+			mutex_unlock(&log->fs_info->tree_log_mutex);
														
 
															+			if (atomic_read(&log->fs_info->tree_log_writers))
														
 
															+				schedule();
														
 
															+			mutex_lock(&log->fs_info->tree_log_mutex);
														
 
															+			finish_wait(&log->fs_info->tree_log_wait, &wait);
														
 
															+		}
														
 
															+		if (batch == log->fs_info->tree_log_batch)
														
 
															+			break;
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
														
 
															+	BUG_ON(ret);
														
 
															+	ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree,
														
 
															+			       &root->fs_info->log_root_tree->dirty_log_pages);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_set_super_log_root(&root->fs_info->super_for_commit,
														
 
															+				 log->fs_info->log_root_tree->node->start);
														
 
															+	btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
														
 
															+		       btrfs_header_level(log->fs_info->log_root_tree->node));
														
 
															+
														
 
															+	write_ctree_super(trans, log->fs_info->tree_root);
														
 
															+	log->fs_info->tree_log_transid++;
														
 
															+	log->fs_info->tree_log_batch = 0;
														
 
															+	atomic_set(&log->fs_info->tree_log_commit, 0);
														
 
															+	smp_mb();
														
 
															+	if (waitqueue_active(&log->fs_info->tree_log_wait))
														
 
															+		wake_up(&log->fs_info->tree_log_wait);
														
 
															+out:
														
 
															+	mutex_unlock(&log->fs_info->tree_log_mutex);
														
 
															+	return 0;
														
 
															+
														
 
															+}
														
 
															+
														
 
															+/* * free all the extents used by the tree log.  This should be called
														
 
															+ * at commit time of the full transaction
														
 
															+ */
														
 
															+int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_root *log;
														
 
															+	struct key;
														
 
															+	u64 start;
														
 
															+	u64 end;
														
 
															+	struct walk_control wc = {
														
 
															+		.free = 1,
														
 
															+		.process_func = process_one_buffer
														
 
															+	};
														
 
															+
														
 
															+	if (!root->log_root)
														
 
															+		return 0;
														
 
															+
														
 
															+	log = root->log_root;
														
 
															+	ret = walk_log_tree(trans, log, &wc);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = find_first_extent_bit(&log->dirty_log_pages,
														
 
															+				    0, &start, &end, EXTENT_DIRTY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		clear_extent_dirty(&log->dirty_log_pages,
														
 
															+				   start, end, GFP_NOFS);
														
 
															+	}
														
 
															+
														
 
															+	log = root->log_root;
														
 
															+	ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
														
 
															+			     &log->root_key);
														
 
															+	BUG_ON(ret);
														
 
															+	root->log_root = NULL;
														
 
															+	kfree(root->log_root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function to update the item for a given subvolumes log root
														
 
															+ * in the tree of log roots
														
 
															+ */
														
 
															+static int update_log_root(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *log)
														
 
															+{
														
 
															+	u64 bytenr = btrfs_root_bytenr(&log->root_item);
														
 
															+	int ret;
														
 
															+
														
 
															+	if (log->node->start == bytenr)
														
 
															+		return 0;
														
 
															+
														
 
															+	btrfs_set_root_bytenr(&log->root_item, log->node->start);
														
 
															+	btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
														
 
															+	ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
														
 
															+				&log->root_key, &log->root_item);
														
 
															+	BUG_ON(ret);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * If both a file and directory are logged, and unlinks or renames are
														
 
															+ * mixed in, we have a few interesting corners:
														
 
															+ *
														
 
															+ * create file X in dir Y
														
 
															+ * link file X to X.link in dir Y
														
 
															+ * fsync file X
														
 
															+ * unlink file X but leave X.link
														
 
															+ * fsync dir Y
														
 
															+ *
														
 
															+ * After a crash we would expect only X.link to exist.  But file X
														
 
															+ * didn't get fsync'd again so the log has back refs for X and X.link.
														
 
															+ *
														
 
															+ * We solve this by removing directory entries and inode backrefs from the
														
 
															+ * log when a file that was logged in the current transaction is
														
 
															+ * unlinked.  Any later fsync will include the updated log entries, and
														
 
															+ * we'll be able to reconstruct the proper directory items from backrefs.
														
 
															+ *
														
 
															+ * This optimizations allows us to avoid relogging the entire inode
														
 
															+ * or the entire directory.
														
 
															+ */
														
 
															+int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *root,
														
 
															+				 const char *name, int name_len,
														
 
															+				 struct inode *dir, u64 index)
														
 
															+{
														
 
															+	struct btrfs_root *log;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	int bytes_del = 0;
														
 
															+
														
 
															+	if (BTRFS_I(dir)->logged_trans < trans->transid)
														
 
															+		return 0;
														
 
															+
														
 
															+	ret = join_running_log_trans(root);
														
 
															+	if (ret)
														
 
															+		return 0;
														
 
															+
														
 
															+	mutex_lock(&BTRFS_I(dir)->log_mutex);
														
 
															+
														
 
															+	log = root->log_root;
														
 
															+	path = btrfs_alloc_path();
														
 
															+	di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
														
 
															+				   name, name_len, -1);
														
 
															+	if (di && !IS_ERR(di)) {
														
 
															+		ret = btrfs_delete_one_dir_name(trans, log, path, di);
														
 
															+		bytes_del += name_len;
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	btrfs_release_path(log, path);
														
 
															+	di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino,
														
 
															+					 index, name, name_len, -1);
														
 
															+	if (di && !IS_ERR(di)) {
														
 
															+		ret = btrfs_delete_one_dir_name(trans, log, path, di);
														
 
															+		bytes_del += name_len;
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+
														
 
															+	/* update the directory size in the log to reflect the names
														
 
															+	 * we have removed
														
 
															+	 */
														
 
															+	if (bytes_del) {
														
 
															+		struct btrfs_key key;
														
 
															+
														
 
															+		key.objectid = dir->i_ino;
														
 
															+		key.offset = 0;
														
 
															+		key.type = BTRFS_INODE_ITEM_KEY;
														
 
															+		btrfs_release_path(log, path);
														
 
															+
														
 
															+		ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
														
 
															+		if (ret == 0) {
														
 
															+			struct btrfs_inode_item *item;
														
 
															+			u64 i_size;
														
 
															+
														
 
															+			item = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+					      struct btrfs_inode_item);
														
 
															+			i_size = btrfs_inode_size(path->nodes[0], item);
														
 
															+			if (i_size > bytes_del)
														
 
															+				i_size -= bytes_del;
														
 
															+			else
														
 
															+				i_size = 0;
														
 
															+			btrfs_set_inode_size(path->nodes[0], item, i_size);
														
 
															+			btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+		} else
														
 
															+			ret = 0;
														
 
															+		btrfs_release_path(log, path);
														
 
															+	}
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	mutex_unlock(&BTRFS_I(dir)->log_mutex);
														
 
															+	end_log_trans(root);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* see comments for btrfs_del_dir_entries_in_log */
														
 
															+int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       const char *name, int name_len,
														
 
															+			       struct inode *inode, u64 dirid)
														
 
															+{
														
 
															+	struct btrfs_root *log;
														
 
															+	u64 index;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (BTRFS_I(inode)->logged_trans < trans->transid)
														
 
															+		return 0;
														
 
															+
														
 
															+	ret = join_running_log_trans(root);
														
 
															+	if (ret)
														
 
															+		return 0;
														
 
															+	log = root->log_root;
														
 
															+	mutex_lock(&BTRFS_I(inode)->log_mutex);
														
 
															+
														
 
															+	ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
														
 
															+				  dirid, &index);
														
 
															+	mutex_unlock(&BTRFS_I(inode)->log_mutex);
														
 
															+	end_log_trans(root);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * creates a range item in the log for 'dirid'.  first_offset and
														
 
															+ * last_offset tell us which parts of the key space the log should
														
 
															+ * be considered authoritative for.
														
 
															+ */
														
 
															+static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
														
 
															+				       struct btrfs_root *log,
														
 
															+				       struct btrfs_path *path,
														
 
															+				       int key_type, u64 dirid,
														
 
															+				       u64 first_offset, u64 last_offset)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_dir_log_item *item;
														
 
															+
														
 
															+	key.objectid = dirid;
														
 
															+	key.offset = first_offset;
														
 
															+	if (key_type == BTRFS_DIR_ITEM_KEY)
														
 
															+		key.type = BTRFS_DIR_LOG_ITEM_KEY;
														
 
															+	else
														
 
															+		key.type = BTRFS_DIR_LOG_INDEX_KEY;
														
 
															+	ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+			      struct btrfs_dir_log_item);
														
 
															+	btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
														
 
															+	btrfs_mark_buffer_dirty(path->nodes[0]);
														
 
															+	btrfs_release_path(log, path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * log all the items included in the current transaction for a given
														
 
															+ * directory.  This also creates the range items in the log tree required
														
 
															+ * to replay anything deleted before the fsync
														
 
															+ */
														
 
															+static noinline int log_dir_items(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, struct inode *inode,
														
 
															+			  struct btrfs_path *path,
														
 
															+			  struct btrfs_path *dst_path, int key_type,
														
 
															+			  u64 min_offset, u64 *last_offset_ret)
														
 
															+{
														
 
															+	struct btrfs_key min_key;
														
 
															+	struct btrfs_key max_key;
														
 
															+	struct btrfs_root *log = root->log_root;
														
 
															+	struct extent_buffer *src;
														
 
															+	int ret;
														
 
															+	int i;
														
 
															+	int nritems;
														
 
															+	u64 first_offset = min_offset;
														
 
															+	u64 last_offset = (u64)-1;
														
 
															+
														
 
															+	log = root->log_root;
														
 
															+	max_key.objectid = inode->i_ino;
														
 
															+	max_key.offset = (u64)-1;
														
 
															+	max_key.type = key_type;
														
 
															+
														
 
															+	min_key.objectid = inode->i_ino;
														
 
															+	min_key.type = key_type;
														
 
															+	min_key.offset = min_offset;
														
 
															+
														
 
															+	path->keep_locks = 1;
														
 
															+
														
 
															+	ret = btrfs_search_forward(root, &min_key, &max_key,
														
 
															+				   path, 0, trans->transid);
														
 
															+
														
 
															+	/*
														
 
															+	 * we didn't find anything from this transaction, see if there
														
 
															+	 * is anything at all
														
 
															+	 */
														
 
															+	if (ret != 0 || min_key.objectid != inode->i_ino ||
														
 
															+	    min_key.type != key_type) {
														
 
															+		min_key.objectid = inode->i_ino;
														
 
															+		min_key.type = key_type;
														
 
															+		min_key.offset = (u64)-1;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
														
 
															+		if (ret < 0) {
														
 
															+			btrfs_release_path(root, path);
														
 
															+			return ret;
														
 
															+		}
														
 
															+		ret = btrfs_previous_item(root, path, inode->i_ino, key_type);
														
 
															+
														
 
															+		/* if ret == 0 there are items for this type,
														
 
															+		 * create a range to tell us the last key of this type.
														
 
															+		 * otherwise, there are no items in this directory after
														
 
															+		 * *min_offset, and we create a range to indicate that.
														
 
															+		 */
														
 
															+		if (ret == 0) {
														
 
															+			struct btrfs_key tmp;
														
 
															+			btrfs_item_key_to_cpu(path->nodes[0], &tmp,
														
 
															+					      path->slots[0]);
														
 
															+			if (key_type == tmp.type) {
														
 
															+				first_offset = max(min_offset, tmp.offset) + 1;
														
 
															+			}
														
 
															+		}
														
 
															+		goto done;
														
 
															+	}
														
 
															+
														
 
															+	/* go backward to find any previous key */
														
 
															+	ret = btrfs_previous_item(root, path, inode->i_ino, key_type);
														
 
															+	if (ret == 0) {
														
 
															+		struct btrfs_key tmp;
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
														
 
															+		if (key_type == tmp.type) {
														
 
															+			first_offset = tmp.offset;
														
 
															+			ret = overwrite_item(trans, log, dst_path,
														
 
															+					     path->nodes[0], path->slots[0],
														
 
															+					     &tmp);
														
 
															+		}
														
 
															+	}
														
 
															+	btrfs_release_path(root, path);
														
 
															+
														
 
															+	/* find the first key from this transaction again */
														
 
															+	ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
														
 
															+	if (ret != 0) {
														
 
															+		WARN_ON(1);
														
 
															+		goto done;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * we have a block from this transaction, log every item in it
														
 
															+	 * from our directory
														
 
															+	 */
														
 
															+	while(1) {
														
 
															+		struct btrfs_key tmp;
														
 
															+		src = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(src);
														
 
															+		for (i = path->slots[0]; i < nritems; i++) {
														
 
															+			btrfs_item_key_to_cpu(src, &min_key, i);
														
 
															+
														
 
															+			if (min_key.objectid != inode->i_ino ||
														
 
															+			    min_key.type != key_type)
														
 
															+				goto done;
														
 
															+			ret = overwrite_item(trans, log, dst_path, src, i,
														
 
															+					     &min_key);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+		path->slots[0] = nritems;
														
 
															+
														
 
															+		/*
														
 
															+		 * look ahead to the next item and see if it is also
														
 
															+		 * from this directory and from this transaction
														
 
															+		 */
														
 
															+		ret = btrfs_next_leaf(root, path);
														
 
															+		if (ret == 1) {
														
 
															+			last_offset = (u64)-1;
														
 
															+			goto done;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
														
 
															+		if (tmp.objectid != inode->i_ino || tmp.type != key_type) {
														
 
															+			last_offset = (u64)-1;
														
 
															+			goto done;
														
 
															+		}
														
 
															+		if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
														
 
															+			ret = overwrite_item(trans, log, dst_path,
														
 
															+					     path->nodes[0], path->slots[0],
														
 
															+					     &tmp);
														
 
															+
														
 
															+			BUG_ON(ret);
														
 
															+			last_offset = tmp.offset;
														
 
															+			goto done;
														
 
															+		}
														
 
															+	}
														
 
															+done:
														
 
															+	*last_offset_ret = last_offset;
														
 
															+	btrfs_release_path(root, path);
														
 
															+	btrfs_release_path(log, dst_path);
														
 
															+
														
 
															+	/* insert the log range keys to indicate where the log is valid */
														
 
															+	ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino,
														
 
															+				 first_offset, last_offset);
														
 
															+	BUG_ON(ret);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * logging directories is very similar to logging inodes, We find all the items
														
 
															+ * from the current transaction and write them to the log.
														
 
															+ *
														
 
															+ * The recovery code scans the directory in the subvolume, and if it finds a
														
 
															+ * key in the range logged that is not present in the log tree, then it means
														
 
															+ * that dir entry was unlinked during the transaction.
														
 
															+ *
														
 
															+ * In order for that scan to work, we must include one key smaller than
														
 
															+ * the smallest logged by this transaction and one key larger than the largest
														
 
															+ * key logged by this transaction.
														
 
															+ */
														
 
															+static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, struct inode *inode,
														
 
															+			  struct btrfs_path *path,
														
 
															+			  struct btrfs_path *dst_path)
														
 
															+{
														
 
															+	u64 min_key;
														
 
															+	u64 max_key;
														
 
															+	int ret;
														
 
															+	int key_type = BTRFS_DIR_ITEM_KEY;
														
 
															+
														
 
															+again:
														
 
															+	min_key = 0;
														
 
															+	max_key = 0;
														
 
															+	while(1) {
														
 
															+		ret = log_dir_items(trans, root, inode, path,
														
 
															+				    dst_path, key_type, min_key,
														
 
															+				    &max_key);
														
 
															+		BUG_ON(ret);
														
 
															+		if (max_key == (u64)-1)
														
 
															+			break;
														
 
															+		min_key = max_key + 1;
														
 
															+	}
														
 
															+
														
 
															+	if (key_type == BTRFS_DIR_ITEM_KEY) {
														
 
															+		key_type = BTRFS_DIR_INDEX_KEY;
														
 
															+		goto again;
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * a helper function to drop items from the log before we relog an
														
 
															+ * inode.  max_key_type indicates the highest item type to remove.
														
 
															+ * This cannot be run for file data extents because it does not
														
 
															+ * free the extents they point to.
														
 
															+ */
														
 
															+static int drop_objectid_items(struct btrfs_trans_handle *trans,
														
 
															+				  struct btrfs_root *log,
														
 
															+				  struct btrfs_path *path,
														
 
															+				  u64 objectid, int max_key_type)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+
														
 
															+	key.objectid = objectid;
														
 
															+	key.type = max_key_type;
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
														
 
															+
														
 
															+		if (ret != 1)
														
 
															+			break;
														
 
															+
														
 
															+		if (path->slots[0] == 0)
														
 
															+			break;
														
 
															+
														
 
															+		path->slots[0]--;
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+				      path->slots[0]);
														
 
															+
														
 
															+		if (found_key.objectid != objectid)
														
 
															+			break;
														
 
															+
														
 
															+		ret = btrfs_del_item(trans, log, path);
														
 
															+		BUG_ON(ret);
														
 
															+		btrfs_release_path(log, path);
														
 
															+	}
														
 
															+	btrfs_release_path(log, path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static noinline int copy_items(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *log,
														
 
															+			       struct btrfs_path *dst_path,
														
 
															+			       struct extent_buffer *src,
														
 
															+			       int start_slot, int nr, int inode_only)
														
 
															+{
														
 
															+	unsigned long src_offset;
														
 
															+	unsigned long dst_offset;
														
 
															+	struct btrfs_file_extent_item *extent;
														
 
															+	struct btrfs_inode_item *inode_item;
														
 
															+	int ret;
														
 
															+	struct btrfs_key *ins_keys;
														
 
															+	u32 *ins_sizes;
														
 
															+	char *ins_data;
														
 
															+	int i;
														
 
															+
														
 
															+	ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
														
 
															+			   nr * sizeof(u32), GFP_NOFS);
														
 
															+	ins_sizes = (u32 *)ins_data;
														
 
															+	ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
														
 
															+
														
 
															+	for (i = 0; i < nr; i++) {
														
 
															+		ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
														
 
															+		btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
														
 
															+	}
														
 
															+	ret = btrfs_insert_empty_items(trans, log, dst_path,
														
 
															+				       ins_keys, ins_sizes, nr);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	for (i = 0; i < nr; i++) {
														
 
															+		dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
														
 
															+						   dst_path->slots[0]);
														
 
															+
														
 
															+		src_offset = btrfs_item_ptr_offset(src, start_slot + i);
														
 
															+
														
 
															+		copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
														
 
															+				   src_offset, ins_sizes[i]);
														
 
															+
														
 
															+		if (inode_only == LOG_INODE_EXISTS &&
														
 
															+		    ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
														
 
															+			inode_item = btrfs_item_ptr(dst_path->nodes[0],
														
 
															+						    dst_path->slots[0],
														
 
															+						    struct btrfs_inode_item);
														
 
															+			btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0);
														
 
															+
														
 
															+			/* set the generation to zero so the recover code
														
 
															+			 * can tell the difference between an logging
														
 
															+			 * just to say 'this inode exists' and a logging
														
 
															+			 * to say 'update this inode with these values'
														
 
															+			 */
														
 
															+			btrfs_set_inode_generation(dst_path->nodes[0],
														
 
															+						   inode_item, 0);
														
 
															+		}
														
 
															+		/* take a reference on file data extents so that truncates
														
 
															+		 * or deletes of this inode don't have to relog the inode
														
 
															+		 * again
														
 
															+		 */
														
 
															+		if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) {
														
 
															+			int found_type;
														
 
															+			extent = btrfs_item_ptr(src, start_slot + i,
														
 
															+						struct btrfs_file_extent_item);
														
 
															+
														
 
															+			found_type = btrfs_file_extent_type(src, extent);
														
 
															+			if (found_type == BTRFS_FILE_EXTENT_REG) {
														
 
															+				u64 ds = btrfs_file_extent_disk_bytenr(src,
														
 
															+								   extent);
														
 
															+				u64 dl = btrfs_file_extent_disk_num_bytes(src,
														
 
															+								      extent);
														
 
															+				/* ds == 0 is a hole */
														
 
															+				if (ds != 0) {
														
 
															+					ret = btrfs_inc_extent_ref(trans, log,
														
 
															+						   ds, dl,
														
 
															+						   dst_path->nodes[0]->start,
														
 
															+						   BTRFS_TREE_LOG_OBJECTID,
														
 
															+						   trans->transid,
														
 
															+						   ins_keys[i].objectid);
														
 
															+					BUG_ON(ret);
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+		dst_path->slots[0]++;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_mark_buffer_dirty(dst_path->nodes[0]);
														
 
															+	btrfs_release_path(log, dst_path);
														
 
															+	kfree(ins_data);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/* log a single inode in the tree log.
														
 
															+ * At least one parent directory for this inode must exist in the tree
														
 
															+ * or be logged already.
														
 
															+ *
														
 
															+ * Any items from this inode changed by the current transaction are copied
														
 
															+ * to the log tree.  An extra reference is taken on any extents in this
														
 
															+ * file, allowing us to avoid a whole pile of corner cases around logging
														
 
															+ * blocks that have been removed from the tree.
														
 
															+ *
														
 
															+ * See LOG_INODE_ALL and related defines for a description of what inode_only
														
 
															+ * does.
														
 
															+ *
														
 
															+ * This handles both files and directories.
														
 
															+ */
														
 
															+static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															+			     struct btrfs_root *root, struct inode *inode,
														
 
															+			     int inode_only)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_path *dst_path;
														
 
															+	struct btrfs_key min_key;
														
 
															+	struct btrfs_key max_key;
														
 
															+	struct btrfs_root *log = root->log_root;
														
 
															+	struct extent_buffer *src = NULL;
														
 
															+	u32 size;
														
 
															+	int ret;
														
 
															+	int nritems;
														
 
															+	int ins_start_slot = 0;
														
 
															+	int ins_nr;
														
 
															+
														
 
															+	log = root->log_root;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	dst_path = btrfs_alloc_path();
														
 
															+
														
 
															+	min_key.objectid = inode->i_ino;
														
 
															+	min_key.type = BTRFS_INODE_ITEM_KEY;
														
 
															+	min_key.offset = 0;
														
 
															+
														
 
															+	max_key.objectid = inode->i_ino;
														
 
															+	if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
														
 
															+		max_key.type = BTRFS_XATTR_ITEM_KEY;
														
 
															+	else
														
 
															+		max_key.type = (u8)-1;
														
 
															+	max_key.offset = (u64)-1;
														
 
															+
														
 
															+	/*
														
 
															+	 * if this inode has already been logged and we're in inode_only
														
 
															+	 * mode, we don't want to delete the things that have already
														
 
															+	 * been written to the log.
														
 
															+	 *
														
 
															+	 * But, if the inode has been through an inode_only log,
														
 
															+	 * the logged_trans field is not set.  This allows us to catch
														
 
															+	 * any new names for this inode in the backrefs by logging it
														
 
															+	 * again
														
 
															+	 */
														
 
															+	if (inode_only == LOG_INODE_EXISTS &&
														
 
															+	    BTRFS_I(inode)->logged_trans == trans->transid) {
														
 
															+		btrfs_free_path(path);
														
 
															+		btrfs_free_path(dst_path);
														
 
															+		goto out;
														
 
															+	}
														
 
															+	mutex_lock(&BTRFS_I(inode)->log_mutex);
														
 
															+
														
 
															+	/*
														
 
															+	 * a brute force approach to making sure we get the most uptodate
														
 
															+	 * copies of everything.
														
 
															+	 */
														
 
															+	if (S_ISDIR(inode->i_mode)) {
														
 
															+		int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
														
 
															+
														
 
															+		if (inode_only == LOG_INODE_EXISTS)
														
 
															+			max_key_type = BTRFS_XATTR_ITEM_KEY;
														
 
															+		ret = drop_objectid_items(trans, log, path,
														
 
															+					  inode->i_ino, max_key_type);
														
 
															+	} else {
														
 
															+		ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
														
 
															+	}
														
 
															+	BUG_ON(ret);
														
 
															+	path->keep_locks = 1;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ins_nr = 0;
														
 
															+		ret = btrfs_search_forward(root, &min_key, &max_key,
														
 
															+					   path, 0, trans->transid);
														
 
															+		if (ret != 0)
														
 
															+			break;
														
 
															+again:
														
 
															+		/* note, ins_nr might be > 0 here, cleanup outside the loop */
														
 
															+		if (min_key.objectid != inode->i_ino)
														
 
															+			break;
														
 
															+		if (min_key.type > max_key.type)
														
 
															+			break;
														
 
															+
														
 
															+		src = path->nodes[0];
														
 
															+		size = btrfs_item_size_nr(src, path->slots[0]);
														
 
															+		if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
														
 
															+			ins_nr++;
														
 
															+			goto next_slot;
														
 
															+		} else if (!ins_nr) {
														
 
															+			ins_start_slot = path->slots[0];
														
 
															+			ins_nr = 1;
														
 
															+			goto next_slot;
														
 
															+		}
														
 
															+
														
 
															+		ret = copy_items(trans, log, dst_path, src, ins_start_slot,
														
 
															+				 ins_nr, inode_only);
														
 
															+		BUG_ON(ret);
														
 
															+		ins_nr = 1;
														
 
															+		ins_start_slot = path->slots[0];
														
 
															+next_slot:
														
 
															+
														
 
															+		nritems = btrfs_header_nritems(path->nodes[0]);
														
 
															+		path->slots[0]++;
														
 
															+		if (path->slots[0] < nritems) {
														
 
															+			btrfs_item_key_to_cpu(path->nodes[0], &min_key,
														
 
															+					      path->slots[0]);
														
 
															+			goto again;
														
 
															+		}
														
 
															+		if (ins_nr) {
														
 
															+			ret = copy_items(trans, log, dst_path, src,
														
 
															+					 ins_start_slot,
														
 
															+					 ins_nr, inode_only);
														
 
															+			BUG_ON(ret);
														
 
															+			ins_nr = 0;
														
 
															+		}
														
 
															+		btrfs_release_path(root, path);
														
 
															+
														
 
															+		if (min_key.offset < (u64)-1)
														
 
															+			min_key.offset++;
														
 
															+		else if (min_key.type < (u8)-1)
														
 
															+			min_key.type++;
														
 
															+		else if (min_key.objectid < (u64)-1)
														
 
															+			min_key.objectid++;
														
 
															+		else
														
 
															+			break;
														
 
															+	}
														
 
															+	if (ins_nr) {
														
 
															+		ret = copy_items(trans, log, dst_path, src,
														
 
															+				 ins_start_slot,
														
 
															+				 ins_nr, inode_only);
														
 
															+		BUG_ON(ret);
														
 
															+		ins_nr = 0;
														
 
															+	}
														
 
															+	WARN_ON(ins_nr);
														
 
															+	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
														
 
															+		btrfs_release_path(root, path);
														
 
															+		btrfs_release_path(log, dst_path);
														
 
															+		BTRFS_I(inode)->log_dirty_trans = 0;
														
 
															+		ret = log_directory_changes(trans, root, inode, path, dst_path);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	BTRFS_I(inode)->logged_trans = trans->transid;
														
 
															+	mutex_unlock(&BTRFS_I(inode)->log_mutex);
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	btrfs_free_path(dst_path);
														
 
															+
														
 
															+	mutex_lock(&root->fs_info->tree_log_mutex);
														
 
															+	ret = update_log_root(trans, log);
														
 
															+	BUG_ON(ret);
														
 
															+	mutex_unlock(&root->fs_info->tree_log_mutex);
														
 
															+out:
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct inode *inode,
														
 
															+		    int inode_only)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	start_log_trans(trans, root);
														
 
															+	ret = __btrfs_log_inode(trans, root, inode, inode_only);
														
 
															+	end_log_trans(root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * helper function around btrfs_log_inode to make sure newly created
														
 
															+ * parent directories also end up in the log.  A minimal inode and backref
														
 
															+ * only logging is done of any parent directories that are older than
														
 
															+ * the last committed transaction
														
 
															+ */
														
 
															+int btrfs_log_dentry(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct dentry *dentry)
														
 
															+{
														
 
															+	int inode_only = LOG_INODE_ALL;
														
 
															+	struct super_block *sb;
														
 
															+	int ret;
														
 
															+
														
 
															+	start_log_trans(trans, root);
														
 
															+	sb = dentry->d_inode->i_sb;
														
 
															+	while(1) {
														
 
															+		ret = __btrfs_log_inode(trans, root, dentry->d_inode,
														
 
															+					inode_only);
														
 
															+		BUG_ON(ret);
														
 
															+		inode_only = LOG_INODE_EXISTS;
														
 
															+
														
 
															+		dentry = dentry->d_parent;
														
 
															+		if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb)
														
 
															+			break;
														
 
															+
														
 
															+		if (BTRFS_I(dentry->d_inode)->generation <=
														
 
															+		    root->fs_info->last_trans_committed)
														
 
															+			break;
														
 
															+	}
														
 
															+	end_log_trans(root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * it is not safe to log dentry if the chunk root has added new
														
 
															+ * chunks.  This returns 0 if the dentry was logged, and 1 otherwise.
														
 
															+ * If this returns 1, you must commit the transaction to safely get your
														
 
															+ * data on disk.
														
 
															+ */
														
 
															+int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, struct dentry *dentry)
														
 
															+{
														
 
															+	u64 gen;
														
 
															+	gen = root->fs_info->last_trans_new_blockgroup;
														
 
															+	if (gen > root->fs_info->last_trans_committed)
														
 
															+		return 1;
														
 
															+	else
														
 
															+		return btrfs_log_dentry(trans, root, dentry);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * should be called during mount to recover any replay any log trees
														
 
															+ * from the FS
														
 
															+ */
														
 
															+int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct btrfs_key tmp_key;
														
 
															+	struct btrfs_root *log;
														
 
															+	struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
														
 
															+	u64 highest_inode;
														
 
															+	struct walk_control wc = {
														
 
															+		.process_func = process_one_buffer,
														
 
															+		.stage = 0,
														
 
															+	};
														
 
															+
														
 
															+	fs_info->log_root_recovering = 1;
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(fs_info->tree_root, 1);
														
 
															+
														
 
															+	wc.trans = trans;
														
 
															+	wc.pin = 1;
														
 
															+
														
 
															+	walk_log_tree(trans, log_root_tree, &wc);
														
 
															+
														
 
															+again:
														
 
															+	key.objectid = BTRFS_TREE_LOG_OBJECTID;
														
 
															+	key.offset = (u64)-1;
														
 
															+	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			break;
														
 
															+		if (ret > 0) {
														
 
															+			if (path->slots[0] == 0)
														
 
															+				break;
														
 
															+			path->slots[0]--;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+				      path->slots[0]);
														
 
															+		btrfs_release_path(log_root_tree, path);
														
 
															+		if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
														
 
															+			break;
														
 
															+
														
 
															+		log = btrfs_read_fs_root_no_radix(log_root_tree,
														
 
															+						  &found_key);
														
 
															+		BUG_ON(!log);
														
 
															+
														
 
															+
														
 
															+		tmp_key.objectid = found_key.offset;
														
 
															+		tmp_key.type = BTRFS_ROOT_ITEM_KEY;
														
 
															+		tmp_key.offset = (u64)-1;
														
 
															+
														
 
															+		wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
														
 
															+
														
 
															+		BUG_ON(!wc.replay_dest);
														
 
															+
														
 
															+		btrfs_record_root_in_trans(wc.replay_dest);
														
 
															+		ret = walk_log_tree(trans, log, &wc);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		if (wc.stage == LOG_WALK_REPLAY_ALL) {
														
 
															+			ret = fixup_inode_link_counts(trans, wc.replay_dest,
														
 
															+						      path);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+		ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode);
														
 
															+		if (ret == 0) {
														
 
															+			wc.replay_dest->highest_inode = highest_inode;
														
 
															+			wc.replay_dest->last_inode_alloc = highest_inode;
														
 
															+		}
														
 
															+
														
 
															+		key.offset = found_key.offset - 1;
														
 
															+		free_extent_buffer(log->node);
														
 
															+		kfree(log);
														
 
															+
														
 
															+		if (found_key.offset == 0)
														
 
															+			break;
														
 
															+	}
														
 
															+	btrfs_release_path(log_root_tree, path);
														
 
															+
														
 
															+	/* step one is to pin it all, step two is to replay just inodes */
														
 
															+	if (wc.pin) {
														
 
															+		wc.pin = 0;
														
 
															+		wc.process_func = replay_one_buffer;
														
 
															+		wc.stage = LOG_WALK_REPLAY_INODES;
														
 
															+		goto again;
														
 
															+	}
														
 
															+	/* step three is to replay everything */
														
 
															+	if (wc.stage < LOG_WALK_REPLAY_ALL) {
														
 
															+		wc.stage++;
														
 
															+		goto again;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+
														
 
															+	free_extent_buffer(log_root_tree->node);
														
 
															+	log_root_tree->log_root = NULL;
														
 
															+	fs_info->log_root_recovering = 0;
														
 
															+
														
 
															+	/* step 4: commit the transaction, which also unpins the blocks */
														
 
															+	btrfs_commit_transaction(trans, fs_info->tree_root);
														
 
															+
														
 
															+	kfree(log_root_tree);
														
 
															+	return 0;
														
 
															+}
														
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -0,0 +1,41 @@
 
															+/*
														
 
															+ * Copyright (C) 2008 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __TREE_LOG_
														
 
															+#define __TREE_LOG_
														
 
															+
														
 
															+int btrfs_sync_log(struct btrfs_trans_handle *trans,
														
 
															+		   struct btrfs_root *root);
														
 
															+int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
														
 
															+int btrfs_log_dentry(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct dentry *dentry);
														
 
															+int btrfs_recover_log_trees(struct btrfs_root *tree_root);
														
 
															+int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_root *root, struct dentry *dentry);
														
 
															+int btrfs_log_inode(struct btrfs_trans_handle *trans,
														
 
															+		    struct btrfs_root *root, struct inode *inode,
														
 
															+		    int inode_only);
														
 
															+int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_root *root,
														
 
															+				 const char *name, int name_len,
														
 
															+				 struct inode *dir, u64 index);
														
 
															+int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
														
 
															+			       struct btrfs_root *root,
														
 
															+			       const char *name, int name_len,
														
 
															+			       struct inode *inode, u64 dirid);
														
 
															+#endif
														
--- a/fs/btrfs/version.h
+++ b/fs/btrfs/version.h
@@ -0,0 +1,4 @@
 
															+#ifndef __BTRFS_VERSION_H
														
 
															+#define __BTRFS_VERSION_H
														
 
															+#define BTRFS_BUILD_VERSION "Btrfs"
														
 
															+#endif
														
--- a/fs/btrfs/version.sh
+++ b/fs/btrfs/version.sh
@@ -0,0 +1,43 @@
 
															+#!/bin/bash
														
 
															+#
														
 
															+# determine-version -- report a useful version for releases
														
 
															+#
														
 
															+# Copyright 2008, Aron Griffis <agriffis@n01se.net>
														
 
															+# Copyright 2008, Oracle
														
 
															+# Released under the GNU GPLv2
														
 
															+ 
														
 
															+v="v0.16"
														
 
															+
														
 
															+which hg > /dev/null
														
 
															+if [ -d .hg ] && [ $? == 0 ]; then
														
 
															+	last=$(hg tags | grep -m1 -o '^v[0-9.]\+')
														
 
															+	 
														
 
															+	# now check if the repo has commits since then...
														
 
															+	if [[ $(hg id -t) == $last || \
														
 
															+	    $(hg di -r "$last:." | awk '/^diff/{print $NF}' | sort -u) == .hgtags ]]
														
 
															+	then
														
 
															+	    # check if it's dirty
														
 
															+	    if [[ $(hg id | cut -d' ' -f1) == *+ ]]; then
														
 
															+		v=$last+
														
 
															+	    else
														
 
															+		v=$last
														
 
															+	    fi
														
 
															+	else
														
 
															+	    # includes dirty flag
														
 
															+	    v=$last+$(hg id -i)
														
 
															+	fi
														
 
															+fi
														
 
															+ 
														
 
															+echo "#ifndef __BUILD_VERSION" > .build-version.h
														
 
															+echo "#define __BUILD_VERSION" >> .build-version.h
														
 
															+echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h
														
 
															+echo "#endif" >> .build-version.h
														
 
															+
														
 
															+diff -q version.h .build-version.h >& /dev/null
														
 
															+
														
 
															+if [ $? == 0 ]; then
														
 
															+    rm .build-version.h
														
 
															+    exit 0
														
 
															+fi
														
 
															+
														
 
															+mv .build-version.h version.h
														
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -0,0 +1,2549 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/bio.h>
														
 
															+#include <linux/buffer_head.h>
														
 
															+#include <linux/blkdev.h>
														
 
															+#include <linux/random.h>
														
 
															+#include <asm/div64.h>
														
 
															+#include "ctree.h"
														
 
															+#include "extent_map.h"
														
 
															+#include "disk-io.h"
														
 
															+#include "transaction.h"
														
 
															+#include "print-tree.h"
														
 
															+#include "volumes.h"
														
 
															+#include "async-thread.h"
														
 
															+
														
 
															+struct map_lookup {
														
 
															+	u64 type;
														
 
															+	int io_align;
														
 
															+	int io_width;
														
 
															+	int stripe_len;
														
 
															+	int sector_size;
														
 
															+	int num_stripes;
														
 
															+	int sub_stripes;
														
 
															+	struct btrfs_bio_stripe stripes[];
														
 
															+};
														
 
															+
														
 
															+#define map_lookup_size(n) (sizeof(struct map_lookup) + \
														
 
															+			    (sizeof(struct btrfs_bio_stripe) * (n)))
														
 
															+
														
 
															+static DEFINE_MUTEX(uuid_mutex);
														
 
															+static LIST_HEAD(fs_uuids);
														
 
															+
														
 
															+void btrfs_lock_volumes(void)
														
 
															+{
														
 
															+	mutex_lock(&uuid_mutex);
														
 
															+}
														
 
															+
														
 
															+void btrfs_unlock_volumes(void)
														
 
															+{
														
 
															+	mutex_unlock(&uuid_mutex);
														
 
															+}
														
 
															+
														
 
															+static void lock_chunks(struct btrfs_root *root)
														
 
															+{
														
 
															+	mutex_lock(&root->fs_info->alloc_mutex);
														
 
															+	mutex_lock(&root->fs_info->chunk_mutex);
														
 
															+}
														
 
															+
														
 
															+static void unlock_chunks(struct btrfs_root *root)
														
 
															+{
														
 
															+	mutex_unlock(&root->fs_info->chunk_mutex);
														
 
															+	mutex_unlock(&root->fs_info->alloc_mutex);
														
 
															+}
														
 
															+
														
 
															+int btrfs_cleanup_fs_uuids(void)
														
 
															+{
														
 
															+	struct btrfs_fs_devices *fs_devices;
														
 
															+	struct list_head *uuid_cur;
														
 
															+	struct list_head *devices_cur;
														
 
															+	struct btrfs_device *dev;
														
 
															+
														
 
															+	list_for_each(uuid_cur, &fs_uuids) {
														
 
															+		fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
														
 
															+					list);
														
 
															+		while(!list_empty(&fs_devices->devices)) {
														
 
															+			devices_cur = fs_devices->devices.next;
														
 
															+			dev = list_entry(devices_cur, struct btrfs_device,
														
 
															+					 dev_list);
														
 
															+			if (dev->bdev) {
														
 
															+				close_bdev_excl(dev->bdev);
														
 
															+				fs_devices->open_devices--;
														
 
															+			}
														
 
															+			list_del(&dev->dev_list);
														
 
															+			kfree(dev->name);
														
 
															+			kfree(dev);
														
 
															+		}
														
 
															+	}
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static noinline struct btrfs_device *__find_device(struct list_head *head,
														
 
															+						   u64 devid, u8 *uuid)
														
 
															+{
														
 
															+	struct btrfs_device *dev;
														
 
															+	struct list_head *cur;
														
 
															+
														
 
															+	list_for_each(cur, head) {
														
 
															+		dev = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (dev->devid == devid &&
														
 
															+		    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
														
 
															+			return dev;
														
 
															+		}
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
														
 
															+{
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_fs_devices *fs_devices;
														
 
															+
														
 
															+	list_for_each(cur, &fs_uuids) {
														
 
															+		fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
														
 
															+		if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
														
 
															+			return fs_devices;
														
 
															+	}
														
 
															+	return NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * we try to collect pending bios for a device so we don't get a large
														
 
															+ * number of procs sending bios down to the same device.  This greatly
														
 
															+ * improves the schedulers ability to collect and merge the bios.
														
 
															+ *
														
 
															+ * But, it also turns into a long list of bios to process and that is sure
														
 
															+ * to eventually make the worker thread block.  The solution here is to
														
 
															+ * make some progress and then put this work struct back at the end of
														
 
															+ * the list if the block device is congested.  This way, multiple devices
														
 
															+ * can make progress from a single worker thread.
														
 
															+ */
														
 
															+static int noinline run_scheduled_bios(struct btrfs_device *device)
														
 
															+{
														
 
															+	struct bio *pending;
														
 
															+	struct backing_dev_info *bdi;
														
 
															+	struct btrfs_fs_info *fs_info;
														
 
															+	struct bio *tail;
														
 
															+	struct bio *cur;
														
 
															+	int again = 0;
														
 
															+	unsigned long num_run = 0;
														
 
															+	unsigned long limit;
														
 
															+
														
 
															+	bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
														
 
															+	fs_info = device->dev_root->fs_info;
														
 
															+	limit = btrfs_async_submit_limit(fs_info);
														
 
															+	limit = limit * 2 / 3;
														
 
															+
														
 
															+loop:
														
 
															+	spin_lock(&device->io_lock);
														
 
															+
														
 
															+	/* take all the bios off the list at once and process them
														
 
															+	 * later on (without the lock held).  But, remember the
														
 
															+	 * tail and other pointers so the bios can be properly reinserted
														
 
															+	 * into the list if we hit congestion
														
 
															+	 */
														
 
															+	pending = device->pending_bios;
														
 
															+	tail = device->pending_bio_tail;
														
 
															+	WARN_ON(pending && !tail);
														
 
															+	device->pending_bios = NULL;
														
 
															+	device->pending_bio_tail = NULL;
														
 
															+
														
 
															+	/*
														
 
															+	 * if pending was null this time around, no bios need processing
														
 
															+	 * at all and we can stop.  Otherwise it'll loop back up again
														
 
															+	 * and do an additional check so no bios are missed.
														
 
															+	 *
														
 
															+	 * device->running_pending is used to synchronize with the
														
 
															+	 * schedule_bio code.
														
 
															+	 */
														
 
															+	if (pending) {
														
 
															+		again = 1;
														
 
															+		device->running_pending = 1;
														
 
															+	} else {
														
 
															+		again = 0;
														
 
															+		device->running_pending = 0;
														
 
															+	}
														
 
															+	spin_unlock(&device->io_lock);
														
 
															+
														
 
															+	while(pending) {
														
 
															+		cur = pending;
														
 
															+		pending = pending->bi_next;
														
 
															+		cur->bi_next = NULL;
														
 
															+		atomic_dec(&fs_info->nr_async_bios);
														
 
															+
														
 
															+		if (atomic_read(&fs_info->nr_async_bios) < limit &&
														
 
															+		    waitqueue_active(&fs_info->async_submit_wait))
														
 
															+			wake_up(&fs_info->async_submit_wait);
														
 
															+
														
 
															+		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
														
 
															+		bio_get(cur);
														
 
															+		submit_bio(cur->bi_rw, cur);
														
 
															+		bio_put(cur);
														
 
															+		num_run++;
														
 
															+
														
 
															+		/*
														
 
															+		 * we made progress, there is more work to do and the bdi
														
 
															+		 * is now congested.  Back off and let other work structs
														
 
															+		 * run instead
														
 
															+		 */
														
 
															+		if (pending && bdi_write_congested(bdi)) {
														
 
															+			struct bio *old_head;
														
 
															+
														
 
															+			spin_lock(&device->io_lock);
														
 
															+
														
 
															+			old_head = device->pending_bios;
														
 
															+			device->pending_bios = pending;
														
 
															+			if (device->pending_bio_tail)
														
 
															+				tail->bi_next = old_head;
														
 
															+			else
														
 
															+				device->pending_bio_tail = tail;
														
 
															+
														
 
															+			spin_unlock(&device->io_lock);
														
 
															+			btrfs_requeue_work(&device->work);
														
 
															+			goto done;
														
 
															+		}
														
 
															+	}
														
 
															+	if (again)
														
 
															+		goto loop;
														
 
															+done:
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+void pending_bios_fn(struct btrfs_work *work)
														
 
															+{
														
 
															+	struct btrfs_device *device;
														
 
															+
														
 
															+	device = container_of(work, struct btrfs_device, work);
														
 
															+	run_scheduled_bios(device);
														
 
															+}
														
 
															+
														
 
															+static noinline int device_list_add(const char *path,
														
 
															+			   struct btrfs_super_block *disk_super,
														
 
															+			   u64 devid, struct btrfs_fs_devices **fs_devices_ret)
														
 
															+{
														
 
															+	struct btrfs_device *device;
														
 
															+	struct btrfs_fs_devices *fs_devices;
														
 
															+	u64 found_transid = btrfs_super_generation(disk_super);
														
 
															+
														
 
															+	fs_devices = find_fsid(disk_super->fsid);
														
 
															+	if (!fs_devices) {
														
 
															+		fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
														
 
															+		if (!fs_devices)
														
 
															+			return -ENOMEM;
														
 
															+		INIT_LIST_HEAD(&fs_devices->devices);
														
 
															+		INIT_LIST_HEAD(&fs_devices->alloc_list);
														
 
															+		list_add(&fs_devices->list, &fs_uuids);
														
 
															+		memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
														
 
															+		fs_devices->latest_devid = devid;
														
 
															+		fs_devices->latest_trans = found_transid;
														
 
															+		device = NULL;
														
 
															+	} else {
														
 
															+		device = __find_device(&fs_devices->devices, devid,
														
 
															+				       disk_super->dev_item.uuid);
														
 
															+	}
														
 
															+	if (!device) {
														
 
															+		device = kzalloc(sizeof(*device), GFP_NOFS);
														
 
															+		if (!device) {
														
 
															+			/* we can safely leave the fs_devices entry around */
														
 
															+			return -ENOMEM;
														
 
															+		}
														
 
															+		device->devid = devid;
														
 
															+		device->work.func = pending_bios_fn;
														
 
															+		memcpy(device->uuid, disk_super->dev_item.uuid,
														
 
															+		       BTRFS_UUID_SIZE);
														
 
															+		device->barriers = 1;
														
 
															+		spin_lock_init(&device->io_lock);
														
 
															+		device->name = kstrdup(path, GFP_NOFS);
														
 
															+		if (!device->name) {
														
 
															+			kfree(device);
														
 
															+			return -ENOMEM;
														
 
															+		}
														
 
															+		list_add(&device->dev_list, &fs_devices->devices);
														
 
															+		list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
														
 
															+		fs_devices->num_devices++;
														
 
															+	}
														
 
															+
														
 
															+	if (found_transid > fs_devices->latest_trans) {
														
 
															+		fs_devices->latest_devid = devid;
														
 
															+		fs_devices->latest_trans = found_transid;
														
 
															+	}
														
 
															+	*fs_devices_ret = fs_devices;
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
														
 
															+{
														
 
															+	struct list_head *head = &fs_devices->devices;
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_device *device;
														
 
															+
														
 
															+	mutex_lock(&uuid_mutex);
														
 
															+again:
														
 
															+	list_for_each(cur, head) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (!device->in_fs_metadata) {
														
 
															+			struct block_device *bdev;
														
 
															+			list_del(&device->dev_list);
														
 
															+			list_del(&device->dev_alloc_list);
														
 
															+			fs_devices->num_devices--;
														
 
															+			if (device->bdev) {
														
 
															+				bdev = device->bdev;
														
 
															+				fs_devices->open_devices--;
														
 
															+				mutex_unlock(&uuid_mutex);
														
 
															+				close_bdev_excl(bdev);
														
 
															+				mutex_lock(&uuid_mutex);
														
 
															+			}
														
 
															+			kfree(device->name);
														
 
															+			kfree(device);
														
 
															+			goto again;
														
 
															+		}
														
 
															+	}
														
 
															+	mutex_unlock(&uuid_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
														
 
															+{
														
 
															+	struct list_head *head = &fs_devices->devices;
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_device *device;
														
 
															+
														
 
															+	mutex_lock(&uuid_mutex);
														
 
															+	list_for_each(cur, head) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (device->bdev) {
														
 
															+			close_bdev_excl(device->bdev);
														
 
															+			fs_devices->open_devices--;
														
 
															+		}
														
 
															+		device->bdev = NULL;
														
 
															+		device->in_fs_metadata = 0;
														
 
															+	}
														
 
															+	fs_devices->mounted = 0;
														
 
															+	mutex_unlock(&uuid_mutex);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
														
 
															+		       int flags, void *holder)
														
 
															+{
														
 
															+	struct block_device *bdev;
														
 
															+	struct list_head *head = &fs_devices->devices;
														
 
															+	struct list_head *cur;
														
 
															+	struct btrfs_device *device;
														
 
															+	struct block_device *latest_bdev = NULL;
														
 
															+	struct buffer_head *bh;
														
 
															+	struct btrfs_super_block *disk_super;
														
 
															+	u64 latest_devid = 0;
														
 
															+	u64 latest_transid = 0;
														
 
															+	u64 transid;
														
 
															+	u64 devid;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	mutex_lock(&uuid_mutex);
														
 
															+	if (fs_devices->mounted)
														
 
															+		goto out;
														
 
															+
														
 
															+	list_for_each(cur, head) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (device->bdev)
														
 
															+			continue;
														
 
															+
														
 
															+		if (!device->name)
														
 
															+			continue;
														
 
															+
														
 
															+		bdev = open_bdev_excl(device->name, flags, holder);
														
 
															+
														
 
															+		if (IS_ERR(bdev)) {
														
 
															+			printk("open %s failed\n", device->name);
														
 
															+			goto error;
														
 
															+		}
														
 
															+		set_blocksize(bdev, 4096);
														
 
															+
														
 
															+		bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
														
 
															+		if (!bh)
														
 
															+			goto error_close;
														
 
															+
														
 
															+		disk_super = (struct btrfs_super_block *)bh->b_data;
														
 
															+		if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
														
 
															+		    sizeof(disk_super->magic)))
														
 
															+			goto error_brelse;
														
 
															+
														
 
															+		devid = le64_to_cpu(disk_super->dev_item.devid);
														
 
															+		if (devid != device->devid)
														
 
															+			goto error_brelse;
														
 
															+
														
 
															+		transid = btrfs_super_generation(disk_super);
														
 
															+		if (!latest_transid || transid > latest_transid) {
														
 
															+			latest_devid = devid;
														
 
															+			latest_transid = transid;
														
 
															+			latest_bdev = bdev;
														
 
															+		}
														
 
															+
														
 
															+		device->bdev = bdev;
														
 
															+		device->in_fs_metadata = 0;
														
 
															+		fs_devices->open_devices++;
														
 
															+		continue;
														
 
															+
														
 
															+error_brelse:
														
 
															+		brelse(bh);
														
 
															+error_close:
														
 
															+		close_bdev_excl(bdev);
														
 
															+error:
														
 
															+		continue;
														
 
															+	}
														
 
															+	if (fs_devices->open_devices == 0) {
														
 
															+		ret = -EIO;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	fs_devices->mounted = 1;
														
 
															+	fs_devices->latest_bdev = latest_bdev;
														
 
															+	fs_devices->latest_devid = latest_devid;
														
 
															+	fs_devices->latest_trans = latest_transid;
														
 
															+out:
														
 
															+	mutex_unlock(&uuid_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_scan_one_device(const char *path, int flags, void *holder,
														
 
															+			  struct btrfs_fs_devices **fs_devices_ret)
														
 
															+{
														
 
															+	struct btrfs_super_block *disk_super;
														
 
															+	struct block_device *bdev;
														
 
															+	struct buffer_head *bh;
														
 
															+	int ret;
														
 
															+	u64 devid;
														
 
															+	u64 transid;
														
 
															+
														
 
															+	mutex_lock(&uuid_mutex);
														
 
															+
														
 
															+	bdev = open_bdev_excl(path, flags, holder);
														
 
															+
														
 
															+	if (IS_ERR(bdev)) {
														
 
															+		ret = PTR_ERR(bdev);
														
 
															+		goto error;
														
 
															+	}
														
 
															+
														
 
															+	ret = set_blocksize(bdev, 4096);
														
 
															+	if (ret)
														
 
															+		goto error_close;
														
 
															+	bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
														
 
															+	if (!bh) {
														
 
															+		ret = -EIO;
														
 
															+		goto error_close;
														
 
															+	}
														
 
															+	disk_super = (struct btrfs_super_block *)bh->b_data;
														
 
															+	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
														
 
															+	    sizeof(disk_super->magic))) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto error_brelse;
														
 
															+	}
														
 
															+	devid = le64_to_cpu(disk_super->dev_item.devid);
														
 
															+	transid = btrfs_super_generation(disk_super);
														
 
															+	if (disk_super->label[0])
														
 
															+		printk("device label %s ", disk_super->label);
														
 
															+	else {
														
 
															+		/* FIXME, make a readl uuid parser */
														
 
															+		printk("device fsid %llx-%llx ",
														
 
															+		       *(unsigned long long *)disk_super->fsid,
														
 
															+		       *(unsigned long long *)(disk_super->fsid + 8));
														
 
															+	}
														
 
															+	printk("devid %Lu transid %Lu %s\n", devid, transid, path);
														
 
															+	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
														
 
															+
														
 
															+error_brelse:
														
 
															+	brelse(bh);
														
 
															+error_close:
														
 
															+	close_bdev_excl(bdev);
														
 
															+error:
														
 
															+	mutex_unlock(&uuid_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * this uses a pretty simple search, the expectation is that it is
														
 
															+ * called very infrequently and that a given device has a small number
														
 
															+ * of extents
														
 
															+ */
														
 
															+static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
														
 
															+					 struct btrfs_device *device,
														
 
															+					 struct btrfs_path *path,
														
 
															+					 u64 num_bytes, u64 *start)
														
 
															+{
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_root *root = device->dev_root;
														
 
															+	struct btrfs_dev_extent *dev_extent = NULL;
														
 
															+	u64 hole_size = 0;
														
 
															+	u64 last_byte = 0;
														
 
															+	u64 search_start = 0;
														
 
															+	u64 search_end = device->total_bytes;
														
 
															+	int ret;
														
 
															+	int slot = 0;
														
 
															+	int start_found;
														
 
															+	struct extent_buffer *l;
														
 
															+
														
 
															+	start_found = 0;
														
 
															+	path->reada = 2;
														
 
															+
														
 
															+	/* FIXME use last free of some kind */
														
 
															+
														
 
															+	/* we don't want to overwrite the superblock on the drive,
														
 
															+	 * so we make sure to start at an offset of at least 1MB
														
 
															+	 */
														
 
															+	search_start = max((u64)1024 * 1024, search_start);
														
 
															+
														
 
															+	if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
														
 
															+		search_start = max(root->fs_info->alloc_start, search_start);
														
 
															+
														
 
															+	key.objectid = device->devid;
														
 
															+	key.offset = search_start;
														
 
															+	key.type = BTRFS_DEV_EXTENT_KEY;
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto error;
														
 
															+	ret = btrfs_previous_item(root, path, 0, key.type);
														
 
															+	if (ret < 0)
														
 
															+		goto error;
														
 
															+	l = path->nodes[0];
														
 
															+	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
														
 
															+	while (1) {
														
 
															+		l = path->nodes[0];
														
 
															+		slot = path->slots[0];
														
 
															+		if (slot >= btrfs_header_nritems(l)) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret == 0)
														
 
															+				continue;
														
 
															+			if (ret < 0)
														
 
															+				goto error;
														
 
															+no_more_items:
														
 
															+			if (!start_found) {
														
 
															+				if (search_start >= search_end) {
														
 
															+					ret = -ENOSPC;
														
 
															+					goto error;
														
 
															+				}
														
 
															+				*start = search_start;
														
 
															+				start_found = 1;
														
 
															+				goto check_pending;
														
 
															+			}
														
 
															+			*start = last_byte > search_start ?
														
 
															+				last_byte : search_start;
														
 
															+			if (search_end <= *start) {
														
 
															+				ret = -ENOSPC;
														
 
															+				goto error;
														
 
															+			}
														
 
															+			goto check_pending;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(l, &key, slot);
														
 
															+
														
 
															+		if (key.objectid < device->devid)
														
 
															+			goto next;
														
 
															+
														
 
															+		if (key.objectid > device->devid)
														
 
															+			goto no_more_items;
														
 
															+
														
 
															+		if (key.offset >= search_start && key.offset > last_byte &&
														
 
															+		    start_found) {
														
 
															+			if (last_byte < search_start)
														
 
															+				last_byte = search_start;
														
 
															+			hole_size = key.offset - last_byte;
														
 
															+			if (key.offset > last_byte &&
														
 
															+			    hole_size >= num_bytes) {
														
 
															+				*start = last_byte;
														
 
															+				goto check_pending;
														
 
															+			}
														
 
															+		}
														
 
															+		if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
														
 
															+			goto next;
														
 
															+		}
														
 
															+
														
 
															+		start_found = 1;
														
 
															+		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
														
 
															+		last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
														
 
															+next:
														
 
															+		path->slots[0]++;
														
 
															+		cond_resched();
														
 
															+	}
														
 
															+check_pending:
														
 
															+	/* we have to make sure we didn't find an extent that has already
														
 
															+	 * been allocated by the map tree or the original allocation
														
 
															+	 */
														
 
															+	btrfs_release_path(root, path);
														
 
															+	BUG_ON(*start < search_start);
														
 
															+
														
 
															+	if (*start + num_bytes > search_end) {
														
 
															+		ret = -ENOSPC;
														
 
															+		goto error;
														
 
															+	}
														
 
															+	/* check for pending inserts here */
														
 
															+	return 0;
														
 
															+
														
 
															+error:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
														
 
															+			  struct btrfs_device *device,
														
 
															+			  u64 start)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_root *root = device->dev_root;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	struct extent_buffer *leaf = NULL;
														
 
															+	struct btrfs_dev_extent *extent = NULL;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	key.objectid = device->devid;
														
 
															+	key.offset = start;
														
 
															+	key.type = BTRFS_DEV_EXTENT_KEY;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	if (ret > 0) {
														
 
															+		ret = btrfs_previous_item(root, path, key.objectid,
														
 
															+					  BTRFS_DEV_EXTENT_KEY);
														
 
															+		BUG_ON(ret);
														
 
															+		leaf = path->nodes[0];
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
														
 
															+		extent = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+					struct btrfs_dev_extent);
														
 
															+		BUG_ON(found_key.offset > start || found_key.offset +
														
 
															+		       btrfs_dev_extent_length(leaf, extent) < start);
														
 
															+		ret = 0;
														
 
															+	} else if (ret == 0) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		extent = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+					struct btrfs_dev_extent);
														
 
															+	}
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (device->bytes_used > 0)
														
 
															+		device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
														
 
															+	ret = btrfs_del_item(trans, root, path);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_device *device,
														
 
															+			   u64 chunk_tree, u64 chunk_objectid,
														
 
															+			   u64 chunk_offset,
														
 
															+			   u64 num_bytes, u64 *start)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_root *root = device->dev_root;
														
 
															+	struct btrfs_dev_extent *extent;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_key key;
														
 
															+
														
 
															+	WARN_ON(!device->in_fs_metadata);
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = find_free_dev_extent(trans, device, path, num_bytes, start);
														
 
															+	if (ret) {
														
 
															+		goto err;
														
 
															+	}
														
 
															+
														
 
															+	key.objectid = device->devid;
														
 
															+	key.offset = *start;
														
 
															+	key.type = BTRFS_DEV_EXTENT_KEY;
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &key,
														
 
															+				      sizeof(*extent));
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	extent = btrfs_item_ptr(leaf, path->slots[0],
														
 
															+				struct btrfs_dev_extent);
														
 
															+	btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
														
 
															+	btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
														
 
															+	btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
														
 
															+
														
 
															+	write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
														
 
															+		    (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
														
 
															+		    BTRFS_UUID_SIZE);
														
 
															+
														
 
															+	btrfs_set_dev_extent_length(leaf, extent, num_bytes);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+err:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static noinline int find_next_chunk(struct btrfs_root *root,
														
 
															+				    u64 objectid, u64 *offset)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_chunk *chunk;
														
 
															+	struct btrfs_key found_key;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	key.objectid = objectid;
														
 
															+	key.offset = (u64)-1;
														
 
															+	key.type = BTRFS_CHUNK_ITEM_KEY;
														
 
															+
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto error;
														
 
															+
														
 
															+	BUG_ON(ret == 0);
														
 
															+
														
 
															+	ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
														
 
															+	if (ret) {
														
 
															+		*offset = 0;
														
 
															+	} else {
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+				      path->slots[0]);
														
 
															+		if (found_key.objectid != objectid)
														
 
															+			*offset = 0;
														
 
															+		else {
														
 
															+			chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
														
 
															+					       struct btrfs_chunk);
														
 
															+			*offset = found_key.offset +
														
 
															+				btrfs_chunk_length(path->nodes[0], chunk);
														
 
															+		}
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static noinline int find_next_devid(struct btrfs_root *root,
														
 
															+				    struct btrfs_path *path, u64 *objectid)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+
														
 
															+	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
														
 
															+	key.type = BTRFS_DEV_ITEM_KEY;
														
 
															+	key.offset = (u64)-1;
														
 
															+
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto error;
														
 
															+
														
 
															+	BUG_ON(ret == 0);
														
 
															+
														
 
															+	ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
														
 
															+				  BTRFS_DEV_ITEM_KEY);
														
 
															+	if (ret) {
														
 
															+		*objectid = 1;
														
 
															+	} else {
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+				      path->slots[0]);
														
 
															+		*objectid = found_key.offset + 1;
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	btrfs_release_path(root, path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * the device information is stored in the chunk root
														
 
															+ * the btrfs_device struct should be fully filled in
														
 
															+ */
														
 
															+int btrfs_add_device(struct btrfs_trans_handle *trans,
														
 
															+		     struct btrfs_root *root,
														
 
															+		     struct btrfs_device *device)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_dev_item *dev_item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_key key;
														
 
															+	unsigned long ptr;
														
 
															+	u64 free_devid = 0;
														
 
															+
														
 
															+	root = root->fs_info->chunk_root;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ret = find_next_devid(root, path, &free_devid);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
														
 
															+	key.type = BTRFS_DEV_ITEM_KEY;
														
 
															+	key.offset = free_devid;
														
 
															+
														
 
															+	ret = btrfs_insert_empty_item(trans, root, path, &key,
														
 
															+				      sizeof(*dev_item));
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
														
 
															+
														
 
															+	device->devid = free_devid;
														
 
															+	btrfs_set_device_id(leaf, dev_item, device->devid);
														
 
															+	btrfs_set_device_type(leaf, dev_item, device->type);
														
 
															+	btrfs_set_device_io_align(leaf, dev_item, device->io_align);
														
 
															+	btrfs_set_device_io_width(leaf, dev_item, device->io_width);
														
 
															+	btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
														
 
															+	btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
														
 
															+	btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
														
 
															+	btrfs_set_device_group(leaf, dev_item, 0);
														
 
															+	btrfs_set_device_seek_speed(leaf, dev_item, 0);
														
 
															+	btrfs_set_device_bandwidth(leaf, dev_item, 0);
														
 
															+
														
 
															+	ptr = (unsigned long)btrfs_device_uuid(dev_item);
														
 
															+	write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+	ret = 0;
														
 
															+
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_rm_dev_item(struct btrfs_root *root,
														
 
															+			     struct btrfs_device *device)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct block_device *bdev = device->bdev;
														
 
															+	struct btrfs_device *next_dev;
														
 
															+	struct btrfs_key key;
														
 
															+	u64 total_bytes;
														
 
															+	struct btrfs_fs_devices *fs_devices;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+
														
 
															+	root = root->fs_info->chunk_root;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
														
 
															+	key.type = BTRFS_DEV_ITEM_KEY;
														
 
															+	key.offset = device->devid;
														
 
															+	lock_chunks(root);
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (ret > 0) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_del_item(trans, root, path);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+
														
 
															+	/*
														
 
															+	 * at this point, the device is zero sized.  We want to
														
 
															+	 * remove it from the devices list and zero out the old super
														
 
															+	 */
														
 
															+	list_del_init(&device->dev_list);
														
 
															+	list_del_init(&device->dev_alloc_list);
														
 
															+	fs_devices = root->fs_info->fs_devices;
														
 
															+
														
 
															+	next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
														
 
															+			      dev_list);
														
 
															+	if (bdev == root->fs_info->sb->s_bdev)
														
 
															+		root->fs_info->sb->s_bdev = next_dev->bdev;
														
 
															+	if (bdev == fs_devices->latest_bdev)
														
 
															+		fs_devices->latest_bdev = next_dev->bdev;
														
 
															+
														
 
															+	total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
														
 
															+	btrfs_set_super_num_devices(&root->fs_info->super_copy,
														
 
															+				    total_bytes - 1);
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	unlock_chunks(root);
														
 
															+	btrfs_commit_transaction(trans, root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_rm_device(struct btrfs_root *root, char *device_path)
														
 
															+{
														
 
															+	struct btrfs_device *device;
														
 
															+	struct block_device *bdev;
														
 
															+	struct buffer_head *bh = NULL;
														
 
															+	struct btrfs_super_block *disk_super;
														
 
															+	u64 all_avail;
														
 
															+	u64 devid;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	mutex_lock(&uuid_mutex);
														
 
															+	mutex_lock(&root->fs_info->volume_mutex);
														
 
															+
														
 
															+	all_avail = root->fs_info->avail_data_alloc_bits |
														
 
															+		root->fs_info->avail_system_alloc_bits |
														
 
															+		root->fs_info->avail_metadata_alloc_bits;
														
 
															+
														
 
															+	if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
														
 
															+	    btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) {
														
 
															+		printk("btrfs: unable to go below four devices on raid10\n");
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
														
 
															+	    btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) {
														
 
															+		printk("btrfs: unable to go below two devices on raid1\n");
														
 
															+		ret = -EINVAL;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	if (strcmp(device_path, "missing") == 0) {
														
 
															+		struct list_head *cur;
														
 
															+		struct list_head *devices;
														
 
															+		struct btrfs_device *tmp;
														
 
															+
														
 
															+		device = NULL;
														
 
															+		devices = &root->fs_info->fs_devices->devices;
														
 
															+		list_for_each(cur, devices) {
														
 
															+			tmp = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+			if (tmp->in_fs_metadata && !tmp->bdev) {
														
 
															+				device = tmp;
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+		bdev = NULL;
														
 
															+		bh = NULL;
														
 
															+		disk_super = NULL;
														
 
															+		if (!device) {
														
 
															+			printk("btrfs: no missing devices found to remove\n");
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+	} else {
														
 
															+		bdev = open_bdev_excl(device_path, 0,
														
 
															+				      root->fs_info->bdev_holder);
														
 
															+		if (IS_ERR(bdev)) {
														
 
															+			ret = PTR_ERR(bdev);
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
														
 
															+		if (!bh) {
														
 
															+			ret = -EIO;
														
 
															+			goto error_close;
														
 
															+		}
														
 
															+		disk_super = (struct btrfs_super_block *)bh->b_data;
														
 
															+		if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
														
 
															+		    sizeof(disk_super->magic))) {
														
 
															+			ret = -ENOENT;
														
 
															+			goto error_brelse;
														
 
															+		}
														
 
															+		if (memcmp(disk_super->fsid, root->fs_info->fsid,
														
 
															+			   BTRFS_FSID_SIZE)) {
														
 
															+			ret = -ENOENT;
														
 
															+			goto error_brelse;
														
 
															+		}
														
 
															+		devid = le64_to_cpu(disk_super->dev_item.devid);
														
 
															+		device = btrfs_find_device(root, devid, NULL);
														
 
															+		if (!device) {
														
 
															+			ret = -ENOENT;
														
 
															+			goto error_brelse;
														
 
															+		}
														
 
															+
														
 
															+	}
														
 
															+	root->fs_info->fs_devices->num_devices--;
														
 
															+	root->fs_info->fs_devices->open_devices--;
														
 
															+
														
 
															+	ret = btrfs_shrink_device(device, 0);
														
 
															+	if (ret)
														
 
															+		goto error_brelse;
														
 
															+
														
 
															+
														
 
															+	ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
														
 
															+	if (ret)
														
 
															+		goto error_brelse;
														
 
															+
														
 
															+	if (bh) {
														
 
															+		/* make sure this device isn't detected as part of
														
 
															+		 * the FS anymore
														
 
															+		 */
														
 
															+		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
														
 
															+		set_buffer_dirty(bh);
														
 
															+		sync_dirty_buffer(bh);
														
 
															+
														
 
															+		brelse(bh);
														
 
															+	}
														
 
															+
														
 
															+	if (device->bdev) {
														
 
															+		/* one close for the device struct or super_block */
														
 
															+		close_bdev_excl(device->bdev);
														
 
															+	}
														
 
															+	if (bdev) {
														
 
															+		/* one close for us */
														
 
															+		close_bdev_excl(bdev);
														
 
															+	}
														
 
															+	kfree(device->name);
														
 
															+	kfree(device);
														
 
															+	ret = 0;
														
 
															+	goto out;
														
 
															+
														
 
															+error_brelse:
														
 
															+	brelse(bh);
														
 
															+error_close:
														
 
															+	if (bdev)
														
 
															+		close_bdev_excl(bdev);
														
 
															+out:
														
 
															+	mutex_unlock(&root->fs_info->volume_mutex);
														
 
															+	mutex_unlock(&uuid_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_device *device;
														
 
															+	struct block_device *bdev;
														
 
															+	struct list_head *cur;
														
 
															+	struct list_head *devices;
														
 
															+	u64 total_bytes;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+
														
 
															+	bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
														
 
															+	if (!bdev) {
														
 
															+		return -EIO;
														
 
															+	}
														
 
															+
														
 
															+	filemap_write_and_wait(bdev->bd_inode->i_mapping);
														
 
															+	mutex_lock(&root->fs_info->volume_mutex);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	lock_chunks(root);
														
 
															+	devices = &root->fs_info->fs_devices->devices;
														
 
															+	list_for_each(cur, devices) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		if (device->bdev == bdev) {
														
 
															+			ret = -EEXIST;
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	device = kzalloc(sizeof(*device), GFP_NOFS);
														
 
															+	if (!device) {
														
 
															+		/* we can safely leave the fs_devices entry around */
														
 
															+		ret = -ENOMEM;
														
 
															+		goto out_close_bdev;
														
 
															+	}
														
 
															+
														
 
															+	device->barriers = 1;
														
 
															+	device->work.func = pending_bios_fn;
														
 
															+	generate_random_uuid(device->uuid);
														
 
															+	spin_lock_init(&device->io_lock);
														
 
															+	device->name = kstrdup(device_path, GFP_NOFS);
														
 
															+	if (!device->name) {
														
 
															+		kfree(device);
														
 
															+		goto out_close_bdev;
														
 
															+	}
														
 
															+	device->io_width = root->sectorsize;
														
 
															+	device->io_align = root->sectorsize;
														
 
															+	device->sector_size = root->sectorsize;
														
 
															+	device->total_bytes = i_size_read(bdev->bd_inode);
														
 
															+	device->dev_root = root->fs_info->dev_root;
														
 
															+	device->bdev = bdev;
														
 
															+	device->in_fs_metadata = 1;
														
 
															+
														
 
															+	ret = btrfs_add_device(trans, root, device);
														
 
															+	if (ret)
														
 
															+		goto out_close_bdev;
														
 
															+
														
 
															+	set_blocksize(device->bdev, 4096);
														
 
															+
														
 
															+	total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
														
 
															+	btrfs_set_super_total_bytes(&root->fs_info->super_copy,
														
 
															+				    total_bytes + device->total_bytes);
														
 
															+
														
 
															+	total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
														
 
															+	btrfs_set_super_num_devices(&root->fs_info->super_copy,
														
 
															+				    total_bytes + 1);
														
 
															+
														
 
															+	list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
														
 
															+	list_add(&device->dev_alloc_list,
														
 
															+		 &root->fs_info->fs_devices->alloc_list);
														
 
															+	root->fs_info->fs_devices->num_devices++;
														
 
															+	root->fs_info->fs_devices->open_devices++;
														
 
															+out:
														
 
															+	unlock_chunks(root);
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	mutex_unlock(&root->fs_info->volume_mutex);
														
 
															+
														
 
															+	return ret;
														
 
															+
														
 
															+out_close_bdev:
														
 
															+	close_bdev_excl(bdev);
														
 
															+	goto out;
														
 
															+}
														
 
															+
														
 
															+int noinline btrfs_update_device(struct btrfs_trans_handle *trans,
														
 
															+				 struct btrfs_device *device)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_root *root;
														
 
															+	struct btrfs_dev_item *dev_item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_key key;
														
 
															+
														
 
															+	root = device->dev_root->fs_info->chunk_root;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
														
 
															+	key.type = BTRFS_DEV_ITEM_KEY;
														
 
															+	key.offset = device->devid;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
														
 
															+	if (ret < 0)
														
 
															+		goto out;
														
 
															+
														
 
															+	if (ret > 0) {
														
 
															+		ret = -ENOENT;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
														
 
															+
														
 
															+	btrfs_set_device_id(leaf, dev_item, device->devid);
														
 
															+	btrfs_set_device_type(leaf, dev_item, device->type);
														
 
															+	btrfs_set_device_io_align(leaf, dev_item, device->io_align);
														
 
															+	btrfs_set_device_io_width(leaf, dev_item, device->io_width);
														
 
															+	btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
														
 
															+	btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
														
 
															+	btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
														
 
															+	btrfs_mark_buffer_dirty(leaf);
														
 
															+
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_device *device, u64 new_size)
														
 
															+{
														
 
															+	struct btrfs_super_block *super_copy =
														
 
															+		&device->dev_root->fs_info->super_copy;
														
 
															+	u64 old_total = btrfs_super_total_bytes(super_copy);
														
 
															+	u64 diff = new_size - device->total_bytes;
														
 
															+
														
 
															+	btrfs_set_super_total_bytes(super_copy, old_total + diff);
														
 
															+	return btrfs_update_device(trans, device);
														
 
															+}
														
 
															+
														
 
															+int btrfs_grow_device(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_device *device, u64 new_size)
														
 
															+{
														
 
															+	int ret;
														
 
															+	lock_chunks(device->dev_root);
														
 
															+	ret = __btrfs_grow_device(trans, device, new_size);
														
 
															+	unlock_chunks(device->dev_root);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
														
 
															+			    struct btrfs_root *root,
														
 
															+			    u64 chunk_tree, u64 chunk_objectid,
														
 
															+			    u64 chunk_offset)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+
														
 
															+	root = root->fs_info->chunk_root;
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	key.objectid = chunk_objectid;
														
 
															+	key.offset = chunk_offset;
														
 
															+	key.type = BTRFS_CHUNK_ITEM_KEY;
														
 
															+
														
 
															+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	ret = btrfs_del_item(trans, root, path);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
														
 
															+			chunk_offset)
														
 
															+{
														
 
															+	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
														
 
															+	struct btrfs_disk_key *disk_key;
														
 
															+	struct btrfs_chunk *chunk;
														
 
															+	u8 *ptr;
														
 
															+	int ret = 0;
														
 
															+	u32 num_stripes;
														
 
															+	u32 array_size;
														
 
															+	u32 len = 0;
														
 
															+	u32 cur;
														
 
															+	struct btrfs_key key;
														
 
															+
														
 
															+	array_size = btrfs_super_sys_array_size(super_copy);
														
 
															+
														
 
															+	ptr = super_copy->sys_chunk_array;
														
 
															+	cur = 0;
														
 
															+
														
 
															+	while (cur < array_size) {
														
 
															+		disk_key = (struct btrfs_disk_key *)ptr;
														
 
															+		btrfs_disk_key_to_cpu(&key, disk_key);
														
 
															+
														
 
															+		len = sizeof(*disk_key);
														
 
															+
														
 
															+		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
														
 
															+			chunk = (struct btrfs_chunk *)(ptr + len);
														
 
															+			num_stripes = btrfs_stack_chunk_num_stripes(chunk);
														
 
															+			len += btrfs_chunk_item_size(num_stripes);
														
 
															+		} else {
														
 
															+			ret = -EIO;
														
 
															+			break;
														
 
															+		}
														
 
															+		if (key.objectid == chunk_objectid &&
														
 
															+		    key.offset == chunk_offset) {
														
 
															+			memmove(ptr, ptr + len, array_size - (cur + len));
														
 
															+			array_size -= len;
														
 
															+			btrfs_set_super_sys_array_size(super_copy, array_size);
														
 
															+		} else {
														
 
															+			ptr += len;
														
 
															+			cur += len;
														
 
															+		}
														
 
															+	}
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+int btrfs_relocate_chunk(struct btrfs_root *root,
														
 
															+			 u64 chunk_tree, u64 chunk_objectid,
														
 
															+			 u64 chunk_offset)
														
 
															+{
														
 
															+	struct extent_map_tree *em_tree;
														
 
															+	struct btrfs_root *extent_root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct extent_map *em;
														
 
															+	struct map_lookup *map;
														
 
															+	int ret;
														
 
															+	int i;
														
 
															+
														
 
															+	printk("btrfs relocating chunk %llu\n",
														
 
															+	       (unsigned long long)chunk_offset);
														
 
															+	root = root->fs_info->chunk_root;
														
 
															+	extent_root = root->fs_info->extent_root;
														
 
															+	em_tree = &root->fs_info->mapping_tree.map_tree;
														
 
															+
														
 
															+	/* step one, relocate all the extents inside this chunk */
														
 
															+	ret = btrfs_relocate_block_group(extent_root, chunk_offset);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	BUG_ON(!trans);
														
 
															+
														
 
															+	lock_chunks(root);
														
 
															+
														
 
															+	/*
														
 
															+	 * step two, delete the device extents and the
														
 
															+	 * chunk tree entries
														
 
															+	 */
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+	BUG_ON(em->start > chunk_offset ||
														
 
															+	       em->start + em->len < chunk_offset);
														
 
															+	map = (struct map_lookup *)em->bdev;
														
 
															+
														
 
															+	for (i = 0; i < map->num_stripes; i++) {
														
 
															+		ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
														
 
															+					    map->stripes[i].physical);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		if (map->stripes[i].dev) {
														
 
															+			ret = btrfs_update_device(trans, map->stripes[i].dev);
														
 
															+			BUG_ON(ret);
														
 
															+		}
														
 
															+	}
														
 
															+	ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
														
 
															+			       chunk_offset);
														
 
															+
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
														
 
															+		ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+
														
 
															+	ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	remove_extent_mapping(em_tree, em);
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+	kfree(map);
														
 
															+	em->bdev = NULL;
														
 
															+
														
 
															+	/* once for the tree */
														
 
															+	free_extent_map(em);
														
 
															+	/* once for us */
														
 
															+	free_extent_map(em);
														
 
															+
														
 
															+	unlock_chunks(root);
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static u64 div_factor(u64 num, int factor)
														
 
															+{
														
 
															+	if (factor == 10)
														
 
															+		return num;
														
 
															+	num *= factor;
														
 
															+	do_div(num, 10);
														
 
															+	return num;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+int btrfs_balance(struct btrfs_root *dev_root)
														
 
															+{
														
 
															+	int ret;
														
 
															+	struct list_head *cur;
														
 
															+	struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
														
 
															+	struct btrfs_device *device;
														
 
															+	u64 old_size;
														
 
															+	u64 size_to_free;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_chunk *chunk;
														
 
															+	struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_key found_key;
														
 
															+
														
 
															+
														
 
															+	mutex_lock(&dev_root->fs_info->volume_mutex);
														
 
															+	dev_root = dev_root->fs_info->dev_root;
														
 
															+
														
 
															+	/* step one make some room on all the devices */
														
 
															+	list_for_each(cur, devices) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_list);
														
 
															+		old_size = device->total_bytes;
														
 
															+		size_to_free = div_factor(old_size, 1);
														
 
															+		size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
														
 
															+		if (device->total_bytes - device->bytes_used > size_to_free)
														
 
															+			continue;
														
 
															+
														
 
															+		ret = btrfs_shrink_device(device, old_size - size_to_free);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		trans = btrfs_start_transaction(dev_root, 1);
														
 
															+		BUG_ON(!trans);
														
 
															+
														
 
															+		ret = btrfs_grow_device(trans, device, old_size);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		btrfs_end_transaction(trans, dev_root);
														
 
															+	}
														
 
															+
														
 
															+	/* step two, relocate all the chunks */
														
 
															+	path = btrfs_alloc_path();
														
 
															+	BUG_ON(!path);
														
 
															+
														
 
															+	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
														
 
															+	key.offset = (u64)-1;
														
 
															+	key.type = BTRFS_CHUNK_ITEM_KEY;
														
 
															+
														
 
															+	while(1) {
														
 
															+		ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			goto error;
														
 
															+
														
 
															+		/*
														
 
															+		 * this shouldn't happen, it means the last relocate
														
 
															+		 * failed
														
 
															+		 */
														
 
															+		if (ret == 0)
														
 
															+			break;
														
 
															+
														
 
															+		ret = btrfs_previous_item(chunk_root, path, 0,
														
 
															+					  BTRFS_CHUNK_ITEM_KEY);
														
 
															+		if (ret)
														
 
															+			break;
														
 
															+
														
 
															+		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
														
 
															+				      path->slots[0]);
														
 
															+		if (found_key.objectid != key.objectid)
														
 
															+			break;
														
 
															+
														
 
															+		chunk = btrfs_item_ptr(path->nodes[0],
														
 
															+				       path->slots[0],
														
 
															+				       struct btrfs_chunk);
														
 
															+		key.offset = found_key.offset;
														
 
															+		/* chunk zero is special */
														
 
															+		if (key.offset == 0)
														
 
															+			break;
														
 
															+
														
 
															+		btrfs_release_path(chunk_root, path);
														
 
															+		ret = btrfs_relocate_chunk(chunk_root,
														
 
															+					   chunk_root->root_key.objectid,
														
 
															+					   found_key.objectid,
														
 
															+					   found_key.offset);
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	btrfs_free_path(path);
														
 
															+	mutex_unlock(&dev_root->fs_info->volume_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * shrinking a device means finding all of the device extents past
														
 
															+ * the new size, and then following the back refs to the chunks.
														
 
															+ * The chunk relocation code actually frees the device extent
														
 
															+ */
														
 
															+int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
														
 
															+{
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_root *root = device->dev_root;
														
 
															+	struct btrfs_dev_extent *dev_extent = NULL;
														
 
															+	struct btrfs_path *path;
														
 
															+	u64 length;
														
 
															+	u64 chunk_tree;
														
 
															+	u64 chunk_objectid;
														
 
															+	u64 chunk_offset;
														
 
															+	int ret;
														
 
															+	int slot;
														
 
															+	struct extent_buffer *l;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
														
 
															+	u64 old_total = btrfs_super_total_bytes(super_copy);
														
 
															+	u64 diff = device->total_bytes - new_size;
														
 
															+
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	if (!trans) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto done;
														
 
															+	}
														
 
															+
														
 
															+	path->reada = 2;
														
 
															+
														
 
															+	lock_chunks(root);
														
 
															+
														
 
															+	device->total_bytes = new_size;
														
 
															+	ret = btrfs_update_device(trans, device);
														
 
															+	if (ret) {
														
 
															+		unlock_chunks(root);
														
 
															+		btrfs_end_transaction(trans, root);
														
 
															+		goto done;
														
 
															+	}
														
 
															+	WARN_ON(diff > old_total);
														
 
															+	btrfs_set_super_total_bytes(super_copy, old_total - diff);
														
 
															+	unlock_chunks(root);
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+
														
 
															+	key.objectid = device->devid;
														
 
															+	key.offset = (u64)-1;
														
 
															+	key.type = BTRFS_DEV_EXTENT_KEY;
														
 
															+
														
 
															+	while (1) {
														
 
															+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+		if (ret < 0)
														
 
															+			goto done;
														
 
															+
														
 
															+		ret = btrfs_previous_item(root, path, 0, key.type);
														
 
															+		if (ret < 0)
														
 
															+			goto done;
														
 
															+		if (ret) {
														
 
															+			ret = 0;
														
 
															+			goto done;
														
 
															+		}
														
 
															+
														
 
															+		l = path->nodes[0];
														
 
															+		slot = path->slots[0];
														
 
															+		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
														
 
															+
														
 
															+		if (key.objectid != device->devid)
														
 
															+			goto done;
														
 
															+
														
 
															+		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
														
 
															+		length = btrfs_dev_extent_length(l, dev_extent);
														
 
															+
														
 
															+		if (key.offset + length <= new_size)
														
 
															+			goto done;
														
 
															+
														
 
															+		chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
														
 
															+		chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
														
 
															+		chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
														
 
															+		btrfs_release_path(root, path);
														
 
															+
														
 
															+		ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
														
 
															+					   chunk_offset);
														
 
															+		if (ret)
														
 
															+			goto done;
														
 
															+	}
														
 
															+
														
 
															+done:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_root *root,
														
 
															+			   struct btrfs_key *key,
														
 
															+			   struct btrfs_chunk *chunk, int item_size)
														
 
															+{
														
 
															+	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
														
 
															+	struct btrfs_disk_key disk_key;
														
 
															+	u32 array_size;
														
 
															+	u8 *ptr;
														
 
															+
														
 
															+	array_size = btrfs_super_sys_array_size(super_copy);
														
 
															+	if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
														
 
															+		return -EFBIG;
														
 
															+
														
 
															+	ptr = super_copy->sys_chunk_array + array_size;
														
 
															+	btrfs_cpu_key_to_disk(&disk_key, key);
														
 
															+	memcpy(ptr, &disk_key, sizeof(disk_key));
														
 
															+	ptr += sizeof(disk_key);
														
 
															+	memcpy(ptr, chunk, item_size);
														
 
															+	item_size += sizeof(disk_key);
														
 
															+	btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size,
														
 
															+					int num_stripes, int sub_stripes)
														
 
															+{
														
 
															+	if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
														
 
															+		return calc_size;
														
 
															+	else if (type & BTRFS_BLOCK_GROUP_RAID10)
														
 
															+		return calc_size * (num_stripes / sub_stripes);
														
 
															+	else
														
 
															+		return calc_size * num_stripes;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *extent_root, u64 *start,
														
 
															+		      u64 *num_bytes, u64 type)
														
 
															+{
														
 
															+	u64 dev_offset;
														
 
															+	struct btrfs_fs_info *info = extent_root->fs_info;
														
 
															+	struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_stripe *stripes;
														
 
															+	struct btrfs_device *device = NULL;
														
 
															+	struct btrfs_chunk *chunk;
														
 
															+	struct list_head private_devs;
														
 
															+	struct list_head *dev_list;
														
 
															+	struct list_head *cur;
														
 
															+	struct extent_map_tree *em_tree;
														
 
															+	struct map_lookup *map;
														
 
															+	struct extent_map *em;
														
 
															+	int min_stripe_size = 1 * 1024 * 1024;
														
 
															+	u64 physical;
														
 
															+	u64 calc_size = 1024 * 1024 * 1024;
														
 
															+	u64 max_chunk_size = calc_size;
														
 
															+	u64 min_free;
														
 
															+	u64 avail;
														
 
															+	u64 max_avail = 0;
														
 
															+	u64 percent_max;
														
 
															+	int num_stripes = 1;
														
 
															+	int min_stripes = 1;
														
 
															+	int sub_stripes = 0;
														
 
															+	int looped = 0;
														
 
															+	int ret;
														
 
															+	int index;
														
 
															+	int stripe_len = 64 * 1024;
														
 
															+	struct btrfs_key key;
														
 
															+
														
 
															+	if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
														
 
															+	    (type & BTRFS_BLOCK_GROUP_DUP)) {
														
 
															+		WARN_ON(1);
														
 
															+		type &= ~BTRFS_BLOCK_GROUP_DUP;
														
 
															+	}
														
 
															+	dev_list = &extent_root->fs_info->fs_devices->alloc_list;
														
 
															+	if (list_empty(dev_list))
														
 
															+		return -ENOSPC;
														
 
															+
														
 
															+	if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
														
 
															+		num_stripes = extent_root->fs_info->fs_devices->open_devices;
														
 
															+		min_stripes = 2;
														
 
															+	}
														
 
															+	if (type & (BTRFS_BLOCK_GROUP_DUP)) {
														
 
															+		num_stripes = 2;
														
 
															+		min_stripes = 2;
														
 
															+	}
														
 
															+	if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
														
 
															+		num_stripes = min_t(u64, 2,
														
 
															+			    extent_root->fs_info->fs_devices->open_devices);
														
 
															+		if (num_stripes < 2)
														
 
															+			return -ENOSPC;
														
 
															+		min_stripes = 2;
														
 
															+	}
														
 
															+	if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
														
 
															+		num_stripes = extent_root->fs_info->fs_devices->open_devices;
														
 
															+		if (num_stripes < 4)
														
 
															+			return -ENOSPC;
														
 
															+		num_stripes &= ~(u32)1;
														
 
															+		sub_stripes = 2;
														
 
															+		min_stripes = 4;
														
 
															+	}
														
 
															+
														
 
															+	if (type & BTRFS_BLOCK_GROUP_DATA) {
														
 
															+		max_chunk_size = 10 * calc_size;
														
 
															+		min_stripe_size = 64 * 1024 * 1024;
														
 
															+	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
														
 
															+		max_chunk_size = 4 * calc_size;
														
 
															+		min_stripe_size = 32 * 1024 * 1024;
														
 
															+	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
														
 
															+		calc_size = 8 * 1024 * 1024;
														
 
															+		max_chunk_size = calc_size * 2;
														
 
															+		min_stripe_size = 1 * 1024 * 1024;
														
 
															+	}
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* we don't want a chunk larger than 10% of the FS */
														
 
															+	percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
														
 
															+	max_chunk_size = min(percent_max, max_chunk_size);
														
 
															+
														
 
															+again:
														
 
															+	if (calc_size * num_stripes > max_chunk_size) {
														
 
															+		calc_size = max_chunk_size;
														
 
															+		do_div(calc_size, num_stripes);
														
 
															+		do_div(calc_size, stripe_len);
														
 
															+		calc_size *= stripe_len;
														
 
															+	}
														
 
															+	/* we don't want tiny stripes */
														
 
															+	calc_size = max_t(u64, min_stripe_size, calc_size);
														
 
															+
														
 
															+	do_div(calc_size, stripe_len);
														
 
															+	calc_size *= stripe_len;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&private_devs);
														
 
															+	cur = dev_list->next;
														
 
															+	index = 0;
														
 
															+
														
 
															+	if (type & BTRFS_BLOCK_GROUP_DUP)
														
 
															+		min_free = calc_size * 2;
														
 
															+	else
														
 
															+		min_free = calc_size;
														
 
															+
														
 
															+	/*
														
 
															+	 * we add 1MB because we never use the first 1MB of the device, unless
														
 
															+	 * we've looped, then we are likely allocating the maximum amount of
														
 
															+	 * space left already
														
 
															+	 */
														
 
															+	if (!looped)
														
 
															+		min_free += 1024 * 1024;
														
 
															+
														
 
															+	/* build a private list of devices we will allocate from */
														
 
															+	while(index < num_stripes) {
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_alloc_list);
														
 
															+
														
 
															+		if (device->total_bytes > device->bytes_used)
														
 
															+			avail = device->total_bytes - device->bytes_used;
														
 
															+		else
														
 
															+			avail = 0;
														
 
															+		cur = cur->next;
														
 
															+
														
 
															+		if (device->in_fs_metadata && avail >= min_free) {
														
 
															+			u64 ignored_start = 0;
														
 
															+			ret = find_free_dev_extent(trans, device, path,
														
 
															+						   min_free,
														
 
															+						   &ignored_start);
														
 
															+			if (ret == 0) {
														
 
															+				list_move_tail(&device->dev_alloc_list,
														
 
															+					       &private_devs);
														
 
															+				index++;
														
 
															+				if (type & BTRFS_BLOCK_GROUP_DUP)
														
 
															+					index++;
														
 
															+			}
														
 
															+		} else if (device->in_fs_metadata && avail > max_avail)
														
 
															+			max_avail = avail;
														
 
															+		if (cur == dev_list)
														
 
															+			break;
														
 
															+	}
														
 
															+	if (index < num_stripes) {
														
 
															+		list_splice(&private_devs, dev_list);
														
 
															+		if (index >= min_stripes) {
														
 
															+			num_stripes = index;
														
 
															+			if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
														
 
															+				num_stripes /= sub_stripes;
														
 
															+				num_stripes *= sub_stripes;
														
 
															+			}
														
 
															+			looped = 1;
														
 
															+			goto again;
														
 
															+		}
														
 
															+		if (!looped && max_avail > 0) {
														
 
															+			looped = 1;
														
 
															+			calc_size = max_avail;
														
 
															+			goto again;
														
 
															+		}
														
 
															+		btrfs_free_path(path);
														
 
															+		return -ENOSPC;
														
 
															+	}
														
 
															+	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
														
 
															+	key.type = BTRFS_CHUNK_ITEM_KEY;
														
 
															+	ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
														
 
															+			      &key.offset);
														
 
															+	if (ret) {
														
 
															+		btrfs_free_path(path);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
														
 
															+	if (!chunk) {
														
 
															+		btrfs_free_path(path);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
														
 
															+	if (!map) {
														
 
															+		kfree(chunk);
														
 
															+		btrfs_free_path(path);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+	btrfs_free_path(path);
														
 
															+	path = NULL;
														
 
															+
														
 
															+	stripes = &chunk->stripe;
														
 
															+	*num_bytes = chunk_bytes_by_type(type, calc_size,
														
 
															+					 num_stripes, sub_stripes);
														
 
															+
														
 
															+	index = 0;
														
 
															+	while(index < num_stripes) {
														
 
															+		struct btrfs_stripe *stripe;
														
 
															+		BUG_ON(list_empty(&private_devs));
														
 
															+		cur = private_devs.next;
														
 
															+		device = list_entry(cur, struct btrfs_device, dev_alloc_list);
														
 
															+
														
 
															+		/* loop over this device again if we're doing a dup group */
														
 
															+		if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
														
 
															+		    (index == num_stripes - 1))
														
 
															+			list_move_tail(&device->dev_alloc_list, dev_list);
														
 
															+
														
 
															+		ret = btrfs_alloc_dev_extent(trans, device,
														
 
															+			     info->chunk_root->root_key.objectid,
														
 
															+			     BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
														
 
															+			     calc_size, &dev_offset);
														
 
															+		BUG_ON(ret);
														
 
															+		device->bytes_used += calc_size;
														
 
															+		ret = btrfs_update_device(trans, device);
														
 
															+		BUG_ON(ret);
														
 
															+
														
 
															+		map->stripes[index].dev = device;
														
 
															+		map->stripes[index].physical = dev_offset;
														
 
															+		stripe = stripes + index;
														
 
															+		btrfs_set_stack_stripe_devid(stripe, device->devid);
														
 
															+		btrfs_set_stack_stripe_offset(stripe, dev_offset);
														
 
															+		memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
														
 
															+		physical = dev_offset;
														
 
															+		index++;
														
 
															+	}
														
 
															+	BUG_ON(!list_empty(&private_devs));
														
 
															+
														
 
															+	/* key was set above */
														
 
															+	btrfs_set_stack_chunk_length(chunk, *num_bytes);
														
 
															+	btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
														
 
															+	btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
														
 
															+	btrfs_set_stack_chunk_type(chunk, type);
														
 
															+	btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
														
 
															+	btrfs_set_stack_chunk_io_align(chunk, stripe_len);
														
 
															+	btrfs_set_stack_chunk_io_width(chunk, stripe_len);
														
 
															+	btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
														
 
															+	btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
														
 
															+	map->sector_size = extent_root->sectorsize;
														
 
															+	map->stripe_len = stripe_len;
														
 
															+	map->io_align = stripe_len;
														
 
															+	map->io_width = stripe_len;
														
 
															+	map->type = type;
														
 
															+	map->num_stripes = num_stripes;
														
 
															+	map->sub_stripes = sub_stripes;
														
 
															+
														
 
															+	ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
														
 
															+				btrfs_chunk_item_size(num_stripes));
														
 
															+	BUG_ON(ret);
														
 
															+	*start = key.offset;;
														
 
															+
														
 
															+	em = alloc_extent_map(GFP_NOFS);
														
 
															+	if (!em)
														
 
															+		return -ENOMEM;
														
 
															+	em->bdev = (struct block_device *)map;
														
 
															+	em->start = key.offset;
														
 
															+	em->len = *num_bytes;
														
 
															+	em->block_start = 0;
														
 
															+
														
 
															+	if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
														
 
															+		ret = btrfs_add_system_chunk(trans, chunk_root, &key,
														
 
															+				    chunk, btrfs_chunk_item_size(num_stripes));
														
 
															+		BUG_ON(ret);
														
 
															+	}
														
 
															+	kfree(chunk);
														
 
															+
														
 
															+	em_tree = &extent_root->fs_info->mapping_tree.map_tree;
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	ret = add_extent_mapping(em_tree, em);
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+	BUG_ON(ret);
														
 
															+	free_extent_map(em);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
														
 
															+{
														
 
															+	extent_map_tree_init(&tree->map_tree, GFP_NOFS);
														
 
															+}
														
 
															+
														
 
															+void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
														
 
															+{
														
 
															+	struct extent_map *em;
														
 
															+
														
 
															+	while(1) {
														
 
															+		spin_lock(&tree->map_tree.lock);
														
 
															+		em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
														
 
															+		if (em)
														
 
															+			remove_extent_mapping(&tree->map_tree, em);
														
 
															+		spin_unlock(&tree->map_tree.lock);
														
 
															+		if (!em)
														
 
															+			break;
														
 
															+		kfree(em->bdev);
														
 
															+		/* once for us */
														
 
															+		free_extent_map(em);
														
 
															+		/* once for the tree */
														
 
															+		free_extent_map(em);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
														
 
															+{
														
 
															+	struct extent_map *em;
														
 
															+	struct map_lookup *map;
														
 
															+	struct extent_map_tree *em_tree = &map_tree->map_tree;
														
 
															+	int ret;
														
 
															+
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	em = lookup_extent_mapping(em_tree, logical, len);
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+	BUG_ON(!em);
														
 
															+
														
 
															+	BUG_ON(em->start > logical || em->start + em->len < logical);
														
 
															+	map = (struct map_lookup *)em->bdev;
														
 
															+	if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
														
 
															+		ret = map->num_stripes;
														
 
															+	else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
														
 
															+		ret = map->sub_stripes;
														
 
															+	else
														
 
															+		ret = 1;
														
 
															+	free_extent_map(em);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int find_live_mirror(struct map_lookup *map, int first, int num,
														
 
															+			    int optimal)
														
 
															+{
														
 
															+	int i;
														
 
															+	if (map->stripes[optimal].dev->bdev)
														
 
															+		return optimal;
														
 
															+	for (i = first; i < first + num; i++) {
														
 
															+		if (map->stripes[i].dev->bdev)
														
 
															+			return i;
														
 
															+	}
														
 
															+	/* we couldn't find one that doesn't fail.  Just return something
														
 
															+	 * and the io error handling code will clean up eventually
														
 
															+	 */
														
 
															+	return optimal;
														
 
															+}
														
 
															+
														
 
															+static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
														
 
															+			     u64 logical, u64 *length,
														
 
															+			     struct btrfs_multi_bio **multi_ret,
														
 
															+			     int mirror_num, struct page *unplug_page)
														
 
															+{
														
 
															+	struct extent_map *em;
														
 
															+	struct map_lookup *map;
														
 
															+	struct extent_map_tree *em_tree = &map_tree->map_tree;
														
 
															+	u64 offset;
														
 
															+	u64 stripe_offset;
														
 
															+	u64 stripe_nr;
														
 
															+	int stripes_allocated = 8;
														
 
															+	int stripes_required = 1;
														
 
															+	int stripe_index;
														
 
															+	int i;
														
 
															+	int num_stripes;
														
 
															+	int max_errors = 0;
														
 
															+	struct btrfs_multi_bio *multi = NULL;
														
 
															+
														
 
															+	if (multi_ret && !(rw & (1 << BIO_RW))) {
														
 
															+		stripes_allocated = 1;
														
 
															+	}
														
 
															+again:
														
 
															+	if (multi_ret) {
														
 
															+		multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
														
 
															+				GFP_NOFS);
														
 
															+		if (!multi)
														
 
															+			return -ENOMEM;
														
 
															+
														
 
															+		atomic_set(&multi->error, 0);
														
 
															+	}
														
 
															+
														
 
															+	spin_lock(&em_tree->lock);
														
 
															+	em = lookup_extent_mapping(em_tree, logical, *length);
														
 
															+	spin_unlock(&em_tree->lock);
														
 
															+
														
 
															+	if (!em && unplug_page)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (!em) {
														
 
															+		printk("unable to find logical %Lu len %Lu\n", logical, *length);
														
 
															+		BUG();
														
 
															+	}
														
 
															+
														
 
															+	BUG_ON(em->start > logical || em->start + em->len < logical);
														
 
															+	map = (struct map_lookup *)em->bdev;
														
 
															+	offset = logical - em->start;
														
 
															+
														
 
															+	if (mirror_num > map->num_stripes)
														
 
															+		mirror_num = 0;
														
 
															+
														
 
															+	/* if our multi bio struct is too small, back off and try again */
														
 
															+	if (rw & (1 << BIO_RW)) {
														
 
															+		if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
														
 
															+				 BTRFS_BLOCK_GROUP_DUP)) {
														
 
															+			stripes_required = map->num_stripes;
														
 
															+			max_errors = 1;
														
 
															+		} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
														
 
															+			stripes_required = map->sub_stripes;
														
 
															+			max_errors = 1;
														
 
															+		}
														
 
															+	}
														
 
															+	if (multi_ret && rw == WRITE &&
														
 
															+	    stripes_allocated < stripes_required) {
														
 
															+		stripes_allocated = map->num_stripes;
														
 
															+		free_extent_map(em);
														
 
															+		kfree(multi);
														
 
															+		goto again;
														
 
															+	}
														
 
															+	stripe_nr = offset;
														
 
															+	/*
														
 
															+	 * stripe_nr counts the total number of stripes we have to stride
														
 
															+	 * to get to this block
														
 
															+	 */
														
 
															+	do_div(stripe_nr, map->stripe_len);
														
 
															+
														
 
															+	stripe_offset = stripe_nr * map->stripe_len;
														
 
															+	BUG_ON(offset < stripe_offset);
														
 
															+
														
 
															+	/* stripe_offset is the offset of this block in its stripe*/
														
 
															+	stripe_offset = offset - stripe_offset;
														
 
															+
														
 
															+	if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
														
 
															+			 BTRFS_BLOCK_GROUP_RAID10 |
														
 
															+			 BTRFS_BLOCK_GROUP_DUP)) {
														
 
															+		/* we limit the length of each bio to what fits in a stripe */
														
 
															+		*length = min_t(u64, em->len - offset,
														
 
															+			      map->stripe_len - stripe_offset);
														
 
															+	} else {
														
 
															+		*length = em->len - offset;
														
 
															+	}
														
 
															+
														
 
															+	if (!multi_ret && !unplug_page)
														
 
															+		goto out;
														
 
															+
														
 
															+	num_stripes = 1;
														
 
															+	stripe_index = 0;
														
 
															+	if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
														
 
															+		if (unplug_page || (rw & (1 << BIO_RW)))
														
 
															+			num_stripes = map->num_stripes;
														
 
															+		else if (mirror_num)
														
 
															+			stripe_index = mirror_num - 1;
														
 
															+		else {
														
 
															+			stripe_index = find_live_mirror(map, 0,
														
 
															+					    map->num_stripes,
														
 
															+					    current->pid % map->num_stripes);
														
 
															+		}
														
 
															+
														
 
															+	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
														
 
															+		if (rw & (1 << BIO_RW))
														
 
															+			num_stripes = map->num_stripes;
														
 
															+		else if (mirror_num)
														
 
															+			stripe_index = mirror_num - 1;
														
 
															+
														
 
															+	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
														
 
															+		int factor = map->num_stripes / map->sub_stripes;
														
 
															+
														
 
															+		stripe_index = do_div(stripe_nr, factor);
														
 
															+		stripe_index *= map->sub_stripes;
														
 
															+
														
 
															+		if (unplug_page || (rw & (1 << BIO_RW)))
														
 
															+			num_stripes = map->sub_stripes;
														
 
															+		else if (mirror_num)
														
 
															+			stripe_index += mirror_num - 1;
														
 
															+		else {
														
 
															+			stripe_index = find_live_mirror(map, stripe_index,
														
 
															+					      map->sub_stripes, stripe_index +
														
 
															+					      current->pid % map->sub_stripes);
														
 
															+		}
														
 
															+	} else {
														
 
															+		/*
														
 
															+		 * after this do_div call, stripe_nr is the number of stripes
														
 
															+		 * on this device we have to walk to find the data, and
														
 
															+		 * stripe_index is the number of our device in the stripe array
														
 
															+		 */
														
 
															+		stripe_index = do_div(stripe_nr, map->num_stripes);
														
 
															+	}
														
 
															+	BUG_ON(stripe_index >= map->num_stripes);
														
 
															+
														
 
															+	for (i = 0; i < num_stripes; i++) {
														
 
															+		if (unplug_page) {
														
 
															+			struct btrfs_device *device;
														
 
															+			struct backing_dev_info *bdi;
														
 
															+
														
 
															+			device = map->stripes[stripe_index].dev;
														
 
															+			if (device->bdev) {
														
 
															+				bdi = blk_get_backing_dev_info(device->bdev);
														
 
															+				if (bdi->unplug_io_fn) {
														
 
															+					bdi->unplug_io_fn(bdi, unplug_page);
														
 
															+				}
														
 
															+			}
														
 
															+		} else {
														
 
															+			multi->stripes[i].physical =
														
 
															+				map->stripes[stripe_index].physical +
														
 
															+				stripe_offset + stripe_nr * map->stripe_len;
														
 
															+			multi->stripes[i].dev = map->stripes[stripe_index].dev;
														
 
															+		}
														
 
															+		stripe_index++;
														
 
															+	}
														
 
															+	if (multi_ret) {
														
 
															+		*multi_ret = multi;
														
 
															+		multi->num_stripes = num_stripes;
														
 
															+		multi->max_errors = max_errors;
														
 
															+	}
														
 
															+out:
														
 
															+	free_extent_map(em);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
														
 
															+		      u64 logical, u64 *length,
														
 
															+		      struct btrfs_multi_bio **multi_ret, int mirror_num)
														
 
															+{
														
 
															+	return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
														
 
															+				 mirror_num, NULL);
														
 
															+}
														
 
															+
														
 
															+int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
														
 
															+		      u64 logical, struct page *page)
														
 
															+{
														
 
															+	u64 length = PAGE_CACHE_SIZE;
														
 
															+	return __btrfs_map_block(map_tree, READ, logical, &length,
														
 
															+				 NULL, 0, page);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static void end_bio_multi_stripe(struct bio *bio, int err)
														
 
															+{
														
 
															+	struct btrfs_multi_bio *multi = bio->bi_private;
														
 
															+	int is_orig_bio = 0;
														
 
															+
														
 
															+	if (err)
														
 
															+		atomic_inc(&multi->error);
														
 
															+
														
 
															+	if (bio == multi->orig_bio)
														
 
															+		is_orig_bio = 1;
														
 
															+
														
 
															+	if (atomic_dec_and_test(&multi->stripes_pending)) {
														
 
															+		if (!is_orig_bio) {
														
 
															+			bio_put(bio);
														
 
															+			bio = multi->orig_bio;
														
 
															+		}
														
 
															+		bio->bi_private = multi->private;
														
 
															+		bio->bi_end_io = multi->end_io;
														
 
															+		/* only send an error to the higher layers if it is
														
 
															+		 * beyond the tolerance of the multi-bio
														
 
															+		 */
														
 
															+		if (atomic_read(&multi->error) > multi->max_errors) {
														
 
															+			err = -EIO;
														
 
															+		} else if (err) {
														
 
															+			/*
														
 
															+			 * this bio is actually up to date, we didn't
														
 
															+			 * go over the max number of errors
														
 
															+			 */
														
 
															+			set_bit(BIO_UPTODATE, &bio->bi_flags);
														
 
															+			err = 0;
														
 
															+		}
														
 
															+		kfree(multi);
														
 
															+
														
 
															+		bio_endio(bio, err);
														
 
															+	} else if (!is_orig_bio) {
														
 
															+		bio_put(bio);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+struct async_sched {
														
 
															+	struct bio *bio;
														
 
															+	int rw;
														
 
															+	struct btrfs_fs_info *info;
														
 
															+	struct btrfs_work work;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * see run_scheduled_bios for a description of why bios are collected for
														
 
															+ * async submit.
														
 
															+ *
														
 
															+ * This will add one bio to the pending list for a device and make sure
														
 
															+ * the work struct is scheduled.
														
 
															+ */
														
 
															+static int noinline schedule_bio(struct btrfs_root *root,
														
 
															+				 struct btrfs_device *device,
														
 
															+				 int rw, struct bio *bio)
														
 
															+{
														
 
															+	int should_queue = 1;
														
 
															+
														
 
															+	/* don't bother with additional async steps for reads, right now */
														
 
															+	if (!(rw & (1 << BIO_RW))) {
														
 
															+		bio_get(bio);
														
 
															+		submit_bio(rw, bio);
														
 
															+		bio_put(bio);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * nr_async_bios allows us to reliably return congestion to the
														
 
															+	 * higher layers.  Otherwise, the async bio makes it appear we have
														
 
															+	 * made progress against dirty pages when we've really just put it
														
 
															+	 * on a queue for later
														
 
															+	 */
														
 
															+	atomic_inc(&root->fs_info->nr_async_bios);
														
 
															+	WARN_ON(bio->bi_next);
														
 
															+	bio->bi_next = NULL;
														
 
															+	bio->bi_rw |= rw;
														
 
															+
														
 
															+	spin_lock(&device->io_lock);
														
 
															+
														
 
															+	if (device->pending_bio_tail)
														
 
															+		device->pending_bio_tail->bi_next = bio;
														
 
															+
														
 
															+	device->pending_bio_tail = bio;
														
 
															+	if (!device->pending_bios)
														
 
															+		device->pending_bios = bio;
														
 
															+	if (device->running_pending)
														
 
															+		should_queue = 0;
														
 
															+
														
 
															+	spin_unlock(&device->io_lock);
														
 
															+
														
 
															+	if (should_queue)
														
 
															+		btrfs_queue_worker(&root->fs_info->submit_workers,
														
 
															+				   &device->work);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
														
 
															+		  int mirror_num, int async_submit)
														
 
															+{
														
 
															+	struct btrfs_mapping_tree *map_tree;
														
 
															+	struct btrfs_device *dev;
														
 
															+	struct bio *first_bio = bio;
														
 
															+	u64 logical = (u64)bio->bi_sector << 9;
														
 
															+	u64 length = 0;
														
 
															+	u64 map_length;
														
 
															+	struct btrfs_multi_bio *multi = NULL;
														
 
															+	int ret;
														
 
															+	int dev_nr = 0;
														
 
															+	int total_devs = 1;
														
 
															+
														
 
															+	length = bio->bi_size;
														
 
															+	map_tree = &root->fs_info->mapping_tree;
														
 
															+	map_length = length;
														
 
															+
														
 
															+	ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
														
 
															+			      mirror_num);
														
 
															+	BUG_ON(ret);
														
 
															+
														
 
															+	total_devs = multi->num_stripes;
														
 
															+	if (map_length < length) {
														
 
															+		printk("mapping failed logical %Lu bio len %Lu "
														
 
															+		       "len %Lu\n", logical, length, map_length);
														
 
															+		BUG();
														
 
															+	}
														
 
															+	multi->end_io = first_bio->bi_end_io;
														
 
															+	multi->private = first_bio->bi_private;
														
 
															+	multi->orig_bio = first_bio;
														
 
															+	atomic_set(&multi->stripes_pending, multi->num_stripes);
														
 
															+
														
 
															+	while(dev_nr < total_devs) {
														
 
															+		if (total_devs > 1) {
														
 
															+			if (dev_nr < total_devs - 1) {
														
 
															+				bio = bio_clone(first_bio, GFP_NOFS);
														
 
															+				BUG_ON(!bio);
														
 
															+			} else {
														
 
															+				bio = first_bio;
														
 
															+			}
														
 
															+			bio->bi_private = multi;
														
 
															+			bio->bi_end_io = end_bio_multi_stripe;
														
 
															+		}
														
 
															+		bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
														
 
															+		dev = multi->stripes[dev_nr].dev;
														
 
															+		if (dev && dev->bdev) {
														
 
															+			bio->bi_bdev = dev->bdev;
														
 
															+			if (async_submit)
														
 
															+				schedule_bio(root, dev, rw, bio);
														
 
															+			else
														
 
															+				submit_bio(rw, bio);
														
 
															+		} else {
														
 
															+			bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
														
 
															+			bio->bi_sector = logical >> 9;
														
 
															+			bio_endio(bio, -EIO);
														
 
															+		}
														
 
															+		dev_nr++;
														
 
															+	}
														
 
															+	if (total_devs == 1)
														
 
															+		kfree(multi);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
														
 
															+				       u8 *uuid)
														
 
															+{
														
 
															+	struct list_head *head = &root->fs_info->fs_devices->devices;
														
 
															+
														
 
															+	return __find_device(head, devid, uuid);
														
 
															+}
														
 
															+
														
 
															+static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
														
 
															+					    u64 devid, u8 *dev_uuid)
														
 
															+{
														
 
															+	struct btrfs_device *device;
														
 
															+	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
														
 
															+
														
 
															+	device = kzalloc(sizeof(*device), GFP_NOFS);
														
 
															+	list_add(&device->dev_list,
														
 
															+		 &fs_devices->devices);
														
 
															+	list_add(&device->dev_alloc_list,
														
 
															+		 &fs_devices->alloc_list);
														
 
															+	device->barriers = 1;
														
 
															+	device->dev_root = root->fs_info->dev_root;
														
 
															+	device->devid = devid;
														
 
															+	device->work.func = pending_bios_fn;
														
 
															+	fs_devices->num_devices++;
														
 
															+	spin_lock_init(&device->io_lock);
														
 
															+	memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
														
 
															+	return device;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
														
 
															+			  struct extent_buffer *leaf,
														
 
															+			  struct btrfs_chunk *chunk)
														
 
															+{
														
 
															+	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
														
 
															+	struct map_lookup *map;
														
 
															+	struct extent_map *em;
														
 
															+	u64 logical;
														
 
															+	u64 length;
														
 
															+	u64 devid;
														
 
															+	u8 uuid[BTRFS_UUID_SIZE];
														
 
															+	int num_stripes;
														
 
															+	int ret;
														
 
															+	int i;
														
 
															+
														
 
															+	logical = key->offset;
														
 
															+	length = btrfs_chunk_length(leaf, chunk);
														
 
															+
														
 
															+	spin_lock(&map_tree->map_tree.lock);
														
 
															+	em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
														
 
															+	spin_unlock(&map_tree->map_tree.lock);
														
 
															+
														
 
															+	/* already mapped? */
														
 
															+	if (em && em->start <= logical && em->start + em->len > logical) {
														
 
															+		free_extent_map(em);
														
 
															+		return 0;
														
 
															+	} else if (em) {
														
 
															+		free_extent_map(em);
														
 
															+	}
														
 
															+
														
 
															+	map = kzalloc(sizeof(*map), GFP_NOFS);
														
 
															+	if (!map)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	em = alloc_extent_map(GFP_NOFS);
														
 
															+	if (!em)
														
 
															+		return -ENOMEM;
														
 
															+	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
														
 
															+	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
														
 
															+	if (!map) {
														
 
															+		free_extent_map(em);
														
 
															+		return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	em->bdev = (struct block_device *)map;
														
 
															+	em->start = logical;
														
 
															+	em->len = length;
														
 
															+	em->block_start = 0;
														
 
															+
														
 
															+	map->num_stripes = num_stripes;
														
 
															+	map->io_width = btrfs_chunk_io_width(leaf, chunk);
														
 
															+	map->io_align = btrfs_chunk_io_align(leaf, chunk);
														
 
															+	map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
														
 
															+	map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
														
 
															+	map->type = btrfs_chunk_type(leaf, chunk);
														
 
															+	map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
														
 
															+	for (i = 0; i < num_stripes; i++) {
														
 
															+		map->stripes[i].physical =
														
 
															+			btrfs_stripe_offset_nr(leaf, chunk, i);
														
 
															+		devid = btrfs_stripe_devid_nr(leaf, chunk, i);
														
 
															+		read_extent_buffer(leaf, uuid, (unsigned long)
														
 
															+				   btrfs_stripe_dev_uuid_nr(chunk, i),
														
 
															+				   BTRFS_UUID_SIZE);
														
 
															+		map->stripes[i].dev = btrfs_find_device(root, devid, uuid);
														
 
															+
														
 
															+		if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
														
 
															+			kfree(map);
														
 
															+			free_extent_map(em);
														
 
															+			return -EIO;
														
 
															+		}
														
 
															+		if (!map->stripes[i].dev) {
														
 
															+			map->stripes[i].dev =
														
 
															+				add_missing_dev(root, devid, uuid);
														
 
															+			if (!map->stripes[i].dev) {
														
 
															+				kfree(map);
														
 
															+				free_extent_map(em);
														
 
															+				return -EIO;
														
 
															+			}
														
 
															+		}
														
 
															+		map->stripes[i].dev->in_fs_metadata = 1;
														
 
															+	}
														
 
															+
														
 
															+	spin_lock(&map_tree->map_tree.lock);
														
 
															+	ret = add_extent_mapping(&map_tree->map_tree, em);
														
 
															+	spin_unlock(&map_tree->map_tree.lock);
														
 
															+	BUG_ON(ret);
														
 
															+	free_extent_map(em);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int fill_device_from_item(struct extent_buffer *leaf,
														
 
															+				 struct btrfs_dev_item *dev_item,
														
 
															+				 struct btrfs_device *device)
														
 
															+{
														
 
															+	unsigned long ptr;
														
 
															+
														
 
															+	device->devid = btrfs_device_id(leaf, dev_item);
														
 
															+	device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
														
 
															+	device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
														
 
															+	device->type = btrfs_device_type(leaf, dev_item);
														
 
															+	device->io_align = btrfs_device_io_align(leaf, dev_item);
														
 
															+	device->io_width = btrfs_device_io_width(leaf, dev_item);
														
 
															+	device->sector_size = btrfs_device_sector_size(leaf, dev_item);
														
 
															+
														
 
															+	ptr = (unsigned long)btrfs_device_uuid(dev_item);
														
 
															+	read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int read_one_dev(struct btrfs_root *root,
														
 
															+			struct extent_buffer *leaf,
														
 
															+			struct btrfs_dev_item *dev_item)
														
 
															+{
														
 
															+	struct btrfs_device *device;
														
 
															+	u64 devid;
														
 
															+	int ret;
														
 
															+	u8 dev_uuid[BTRFS_UUID_SIZE];
														
 
															+
														
 
															+	devid = btrfs_device_id(leaf, dev_item);
														
 
															+	read_extent_buffer(leaf, dev_uuid,
														
 
															+			   (unsigned long)btrfs_device_uuid(dev_item),
														
 
															+			   BTRFS_UUID_SIZE);
														
 
															+	device = btrfs_find_device(root, devid, dev_uuid);
														
 
															+	if (!device) {
														
 
															+		printk("warning devid %Lu missing\n", devid);
														
 
															+		device = add_missing_dev(root, devid, dev_uuid);
														
 
															+		if (!device)
														
 
															+			return -ENOMEM;
														
 
															+	}
														
 
															+
														
 
															+	fill_device_from_item(leaf, dev_item, device);
														
 
															+	device->dev_root = root->fs_info->dev_root;
														
 
															+	device->in_fs_metadata = 1;
														
 
															+	ret = 0;
														
 
															+#if 0
														
 
															+	ret = btrfs_open_device(device);
														
 
															+	if (ret) {
														
 
															+		kfree(device);
														
 
															+	}
														
 
															+#endif
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
														
 
															+{
														
 
															+	struct btrfs_dev_item *dev_item;
														
 
															+
														
 
															+	dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
														
 
															+						     dev_item);
														
 
															+	return read_one_dev(root, buf, dev_item);
														
 
															+}
														
 
															+
														
 
															+int btrfs_read_sys_array(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
														
 
															+	struct extent_buffer *sb;
														
 
															+	struct btrfs_disk_key *disk_key;
														
 
															+	struct btrfs_chunk *chunk;
														
 
															+	u8 *ptr;
														
 
															+	unsigned long sb_ptr;
														
 
															+	int ret = 0;
														
 
															+	u32 num_stripes;
														
 
															+	u32 array_size;
														
 
															+	u32 len = 0;
														
 
															+	u32 cur;
														
 
															+	struct btrfs_key key;
														
 
															+
														
 
															+	sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
														
 
															+					  BTRFS_SUPER_INFO_SIZE);
														
 
															+	if (!sb)
														
 
															+		return -ENOMEM;
														
 
															+	btrfs_set_buffer_uptodate(sb);
														
 
															+	write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
														
 
															+	array_size = btrfs_super_sys_array_size(super_copy);
														
 
															+
														
 
															+	ptr = super_copy->sys_chunk_array;
														
 
															+	sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
														
 
															+	cur = 0;
														
 
															+
														
 
															+	while (cur < array_size) {
														
 
															+		disk_key = (struct btrfs_disk_key *)ptr;
														
 
															+		btrfs_disk_key_to_cpu(&key, disk_key);
														
 
															+
														
 
															+		len = sizeof(*disk_key); ptr += len;
														
 
															+		sb_ptr += len;
														
 
															+		cur += len;
														
 
															+
														
 
															+		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
														
 
															+			chunk = (struct btrfs_chunk *)sb_ptr;
														
 
															+			ret = read_one_chunk(root, &key, sb, chunk);
														
 
															+			if (ret)
														
 
															+				break;
														
 
															+			num_stripes = btrfs_chunk_num_stripes(sb, chunk);
														
 
															+			len = btrfs_chunk_item_size(num_stripes);
														
 
															+		} else {
														
 
															+			ret = -EIO;
														
 
															+			break;
														
 
															+		}
														
 
															+		ptr += len;
														
 
															+		sb_ptr += len;
														
 
															+		cur += len;
														
 
															+	}
														
 
															+	free_extent_buffer(sb);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int btrfs_read_chunk_tree(struct btrfs_root *root)
														
 
															+{
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_key key;
														
 
															+	struct btrfs_key found_key;
														
 
															+	int ret;
														
 
															+	int slot;
														
 
															+
														
 
															+	root = root->fs_info->chunk_root;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* first we search for all of the device items, and then we
														
 
															+	 * read in all of the chunk items.  This way we can create chunk
														
 
															+	 * mappings that reference all of the devices that are afound
														
 
															+	 */
														
 
															+	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
														
 
															+	key.offset = 0;
														
 
															+	key.type = 0;
														
 
															+again:
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	while(1) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		slot = path->slots[0];
														
 
															+		if (slot >= btrfs_header_nritems(leaf)) {
														
 
															+			ret = btrfs_next_leaf(root, path);
														
 
															+			if (ret == 0)
														
 
															+				continue;
														
 
															+			if (ret < 0)
														
 
															+				goto error;
														
 
															+			break;
														
 
															+		}
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
														
 
															+		if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
														
 
															+			if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
														
 
															+				break;
														
 
															+			if (found_key.type == BTRFS_DEV_ITEM_KEY) {
														
 
															+				struct btrfs_dev_item *dev_item;
														
 
															+				dev_item = btrfs_item_ptr(leaf, slot,
														
 
															+						  struct btrfs_dev_item);
														
 
															+				ret = read_one_dev(root, leaf, dev_item);
														
 
															+				BUG_ON(ret);
														
 
															+			}
														
 
															+		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
														
 
															+			struct btrfs_chunk *chunk;
														
 
															+			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
														
 
															+			ret = read_one_chunk(root, &found_key, leaf, chunk);
														
 
															+		}
														
 
															+		path->slots[0]++;
														
 
															+	}
														
 
															+	if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
														
 
															+		key.objectid = 0;
														
 
															+		btrfs_release_path(root, path);
														
 
															+		goto again;
														
 
															+	}
														
 
															+
														
 
															+	btrfs_free_path(path);
														
 
															+	ret = 0;
														
 
															+error:
														
 
															+	return ret;
														
 
															+}
														
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -0,0 +1,150 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Oracle.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __BTRFS_VOLUMES_
														
 
															+#define __BTRFS_VOLUMES_
														
 
															+
														
 
															+#include <linux/bio.h>
														
 
															+#include "async-thread.h"
														
 
															+
														
 
															+struct buffer_head;
														
 
															+struct btrfs_device {
														
 
															+	struct list_head dev_list;
														
 
															+	struct list_head dev_alloc_list;
														
 
															+	struct btrfs_root *dev_root;
														
 
															+	struct buffer_head *pending_io;
														
 
															+	struct bio *pending_bios;
														
 
															+	struct bio *pending_bio_tail;
														
 
															+	int running_pending;
														
 
															+	u64 generation;
														
 
															+
														
 
															+	int barriers;
														
 
															+	int in_fs_metadata;
														
 
															+
														
 
															+	spinlock_t io_lock;
														
 
															+
														
 
															+	struct block_device *bdev;
														
 
															+
														
 
															+	char *name;
														
 
															+
														
 
															+	/* the internal btrfs device id */
														
 
															+	u64 devid;
														
 
															+
														
 
															+	/* size of the device */
														
 
															+	u64 total_bytes;
														
 
															+
														
 
															+	/* bytes used */
														
 
															+	u64 bytes_used;
														
 
															+
														
 
															+	/* optimal io alignment for this device */
														
 
															+	u32 io_align;
														
 
															+
														
 
															+	/* optimal io width for this device */
														
 
															+	u32 io_width;
														
 
															+
														
 
															+	/* minimal io size for this device */
														
 
															+	u32 sector_size;
														
 
															+
														
 
															+	/* type and info about this device */
														
 
															+	u64 type;
														
 
															+
														
 
															+	/* physical drive uuid (or lvm uuid) */
														
 
															+	u8 uuid[BTRFS_UUID_SIZE];
														
 
															+
														
 
															+	struct btrfs_work work;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_fs_devices {
														
 
															+	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
														
 
															+
														
 
															+	/* the device with this id has the most recent coyp of the super */
														
 
															+	u64 latest_devid;
														
 
															+	u64 latest_trans;
														
 
															+	u64 num_devices;
														
 
															+	u64 open_devices;
														
 
															+	struct block_device *latest_bdev;
														
 
															+	/* all of the devices in the FS */
														
 
															+	struct list_head devices;
														
 
															+
														
 
															+	/* devices not currently being allocated */
														
 
															+	struct list_head alloc_list;
														
 
															+	struct list_head list;
														
 
															+	int mounted;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_bio_stripe {
														
 
															+	struct btrfs_device *dev;
														
 
															+	u64 physical;
														
 
															+};
														
 
															+
														
 
															+struct btrfs_multi_bio {
														
 
															+	atomic_t stripes_pending;
														
 
															+	bio_end_io_t *end_io;
														
 
															+	struct bio *orig_bio;
														
 
															+	void *private;
														
 
															+	atomic_t error;
														
 
															+	int max_errors;
														
 
															+	int num_stripes;
														
 
															+	struct btrfs_bio_stripe stripes[];
														
 
															+};
														
 
															+
														
 
															+#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
														
 
															+			    (sizeof(struct btrfs_bio_stripe) * (n)))
														
 
															+
														
 
															+int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
														
 
															+			   struct btrfs_device *device,
														
 
															+			   u64 chunk_tree, u64 chunk_objectid,
														
 
															+			   u64 chunk_offset,
														
 
															+			   u64 num_bytes, u64 *start);
														
 
															+int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
														
 
															+		    u64 logical, u64 *length,
														
 
															+		    struct btrfs_multi_bio **multi_ret, int mirror_num);
														
 
															+int btrfs_read_sys_array(struct btrfs_root *root);
														
 
															+int btrfs_read_chunk_tree(struct btrfs_root *root);
														
 
															+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_root *extent_root, u64 *start,
														
 
															+		      u64 *num_bytes, u64 type);
														
 
															+void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
														
 
															+void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
														
 
															+int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
														
 
															+		  int mirror_num, int async_submit);
														
 
															+int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
														
 
															+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
														
 
															+		       int flags, void *holder);
														
 
															+int btrfs_scan_one_device(const char *path, int flags, void *holder,
														
 
															+			  struct btrfs_fs_devices **fs_devices_ret);
														
 
															+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
														
 
															+int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
														
 
															+int btrfs_add_device(struct btrfs_trans_handle *trans,
														
 
															+		     struct btrfs_root *root,
														
 
															+		     struct btrfs_device *device);
														
 
															+int btrfs_rm_device(struct btrfs_root *root, char *device_path);
														
 
															+int btrfs_cleanup_fs_uuids(void);
														
 
															+int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
														
 
															+int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
														
 
															+		      u64 logical, struct page *page);
														
 
															+int btrfs_grow_device(struct btrfs_trans_handle *trans,
														
 
															+		      struct btrfs_device *device, u64 new_size);
														
 
															+struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
														
 
															+				       u8 *uuid);
														
 
															+int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
														
 
															+int btrfs_init_new_device(struct btrfs_root *root, char *path);
														
 
															+int btrfs_balance(struct btrfs_root *dev_root);
														
 
															+void btrfs_unlock_volumes(void);
														
 
															+void btrfs_lock_volumes(void);
														
 
															+#endif
														
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -0,0 +1,321 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Red Hat.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/init.h>
														
 
															+#include <linux/fs.h>
														
 
															+#include <linux/slab.h>
														
 
															+#include <linux/rwsem.h>
														
 
															+#include <linux/xattr.h>
														
 
															+#include "ctree.h"
														
 
															+#include "btrfs_inode.h"
														
 
															+#include "transaction.h"
														
 
															+#include "xattr.h"
														
 
															+#include "disk-io.h"
														
 
															+
														
 
															+
														
 
															+ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
														
 
															+				void *buffer, size_t size)
														
 
															+{
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	int ret = 0;
														
 
															+	unsigned long data_ptr;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* lookup the xattr by name */
														
 
															+	di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name,
														
 
															+				strlen(name), 0);
														
 
															+	if (!di || IS_ERR(di)) {
														
 
															+		ret = -ENODATA;
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	leaf = path->nodes[0];
														
 
															+	/* if size is 0, that means we want the size of the attr */
														
 
															+	if (!size) {
														
 
															+		ret = btrfs_dir_data_len(leaf, di);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/* now get the data out of our dir_item */
														
 
															+	if (btrfs_dir_data_len(leaf, di) > size) {
														
 
															+		ret = -ERANGE;
														
 
															+		goto out;
														
 
															+	}
														
 
															+	data_ptr = (unsigned long)((char *)(di + 1) +
														
 
															+				   btrfs_dir_name_len(leaf, di));
														
 
															+	read_extent_buffer(leaf, buffer, data_ptr,
														
 
															+			   btrfs_dir_data_len(leaf, di));
														
 
															+	ret = btrfs_dir_data_len(leaf, di);
														
 
															+
														
 
															+out:
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int __btrfs_setxattr(struct inode *inode, const char *name,
														
 
															+			    const void *value, size_t size, int flags)
														
 
															+{
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_trans_handle *trans;
														
 
															+	struct btrfs_path *path;
														
 
															+	int ret = 0, mod = 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	trans = btrfs_start_transaction(root, 1);
														
 
															+	btrfs_set_trans_block_group(trans, inode);
														
 
															+
														
 
															+	/* first lets see if we already have this xattr */
														
 
															+	di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
														
 
															+				strlen(name), -1);
														
 
															+	if (IS_ERR(di)) {
														
 
															+		ret = PTR_ERR(di);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	/* ok we already have this xattr, lets remove it */
														
 
															+	if (di) {
														
 
															+		/* if we want create only exit */
														
 
															+		if (flags & XATTR_CREATE) {
														
 
															+			ret = -EEXIST;
														
 
															+			goto out;
														
 
															+		}
														
 
															+
														
 
															+		ret = btrfs_delete_one_dir_name(trans, root, path, di);
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+		btrfs_release_path(root, path);
														
 
															+
														
 
															+		/* if we don't have a value then we are removing the xattr */
														
 
															+		if (!value) {
														
 
															+			mod = 1;
														
 
															+			goto out;
														
 
															+		}
														
 
															+	} else {
														
 
															+		btrfs_release_path(root, path);
														
 
															+
														
 
															+		if (flags & XATTR_REPLACE) {
														
 
															+			/* we couldn't find the attr to replace */
														
 
															+			ret = -ENODATA;
														
 
															+			goto out;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* ok we have to create a completely new xattr */
														
 
															+	ret = btrfs_insert_xattr_item(trans, root, name, strlen(name),
														
 
															+				      value, size, inode->i_ino);
														
 
															+	if (ret)
														
 
															+		goto out;
														
 
															+	mod = 1;
														
 
															+
														
 
															+out:
														
 
															+	if (mod) {
														
 
															+		inode->i_ctime = CURRENT_TIME;
														
 
															+		ret = btrfs_update_inode(trans, root, inode);
														
 
															+	}
														
 
															+
														
 
															+	btrfs_end_transaction(trans, root);
														
 
															+	btrfs_free_path(path);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
														
 
															+{
														
 
															+	struct btrfs_key key, found_key;
														
 
															+	struct inode *inode = dentry->d_inode;
														
 
															+	struct btrfs_root *root = BTRFS_I(inode)->root;
														
 
															+	struct btrfs_path *path;
														
 
															+	struct btrfs_item *item;
														
 
															+	struct extent_buffer *leaf;
														
 
															+	struct btrfs_dir_item *di;
														
 
															+	int ret = 0, slot, advance;
														
 
															+	size_t total_size = 0, size_left = size;
														
 
															+	unsigned long name_ptr;
														
 
															+	size_t name_len;
														
 
															+	u32 nritems;
														
 
															+
														
 
															+	/*
														
 
															+	 * ok we want all objects associated with this id.
														
 
															+	 * NOTE: we set key.offset = 0; because we want to start with the
														
 
															+	 * first xattr that we find and walk forward
														
 
															+	 */
														
 
															+	key.objectid = inode->i_ino;
														
 
															+	btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
														
 
															+	key.offset = 0;
														
 
															+
														
 
															+	path = btrfs_alloc_path();
														
 
															+	if (!path)
														
 
															+		return -ENOMEM;
														
 
															+	path->reada = 2;
														
 
															+
														
 
															+	/* search for our xattrs */
														
 
															+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
														
 
															+	if (ret < 0)
														
 
															+		goto err;
														
 
															+	ret = 0;
														
 
															+	advance = 0;
														
 
															+	while (1) {
														
 
															+		leaf = path->nodes[0];
														
 
															+		nritems = btrfs_header_nritems(leaf);
														
 
															+		slot = path->slots[0];
														
 
															+
														
 
															+		/* this is where we start walking through the path */
														
 
															+		if (advance || slot >= nritems) {
														
 
															+			/*
														
 
															+			 * if we've reached the last slot in this leaf we need
														
 
															+			 * to go to the next leaf and reset everything
														
 
															+			 */
														
 
															+			if (slot >= nritems-1) {
														
 
															+				ret = btrfs_next_leaf(root, path);
														
 
															+				if (ret)
														
 
															+					break;
														
 
															+				leaf = path->nodes[0];
														
 
															+				nritems = btrfs_header_nritems(leaf);
														
 
															+				slot = path->slots[0];
														
 
															+			} else {
														
 
															+				/*
														
 
															+				 * just walking through the slots on this leaf
														
 
															+				 */
														
 
															+				slot++;
														
 
															+				path->slots[0]++;
														
 
															+			}
														
 
															+		}
														
 
															+		advance = 1;
														
 
															+
														
 
															+		item = btrfs_item_nr(leaf, slot);
														
 
															+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
														
 
															+
														
 
															+		/* check to make sure this item is what we want */
														
 
															+		if (found_key.objectid != key.objectid)
														
 
															+			break;
														
 
															+		if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY)
														
 
															+			break;
														
 
															+
														
 
															+		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
														
 
															+
														
 
															+		name_len = btrfs_dir_name_len(leaf, di);
														
 
															+		total_size += name_len + 1;
														
 
															+
														
 
															+		/* we are just looking for how big our buffer needs to be */
														
 
															+		if (!size)
														
 
															+			continue;
														
 
															+
														
 
															+		if (!buffer || (name_len + 1) > size_left) {
														
 
															+			ret = -ERANGE;
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		name_ptr = (unsigned long)(di + 1);
														
 
															+		read_extent_buffer(leaf, buffer, name_ptr, name_len);
														
 
															+		buffer[name_len] = '\0';
														
 
															+
														
 
															+		size_left -= name_len + 1;
														
 
															+		buffer += name_len + 1;
														
 
															+	}
														
 
															+	ret = total_size;
														
 
															+
														
 
															+err:
														
 
															+	btrfs_free_path(path);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * List of handlers for synthetic system.* attributes.  All real ondisk
														
 
															+ * attributes are handled directly.
														
 
															+ */
														
 
															+struct xattr_handler *btrfs_xattr_handlers[] = {
														
 
															+#ifdef CONFIG_FS_POSIX_ACL
														
 
															+	&btrfs_xattr_acl_access_handler,
														
 
															+	&btrfs_xattr_acl_default_handler,
														
 
															+#endif
														
 
															+	NULL,
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * Check if the attribute is in a supported namespace.
														
 
															+ *
														
 
															+ * This applied after the check for the synthetic attributes in the system
														
 
															+ * namespace.
														
 
															+ */
														
 
															+static bool btrfs_is_valid_xattr(const char *name)
														
 
															+{
														
 
															+	return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
														
 
															+	       !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
														
 
															+	       !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
														
 
															+	       !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
														
 
															+}
														
 
															+
														
 
															+ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
														
 
															+		       void *buffer, size_t size)
														
 
															+{
														
 
															+	/*
														
 
															+	 * If this is a request for a synthetic attribute in the system.*
														
 
															+	 * namespace use the generic infrastructure to resolve a handler
														
 
															+	 * for it via sb->s_xattr.
														
 
															+	 */
														
 
															+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
														
 
															+		return generic_getxattr(dentry, name, buffer, size);
														
 
															+
														
 
															+	if (!btrfs_is_valid_xattr(name))
														
 
															+		return -EOPNOTSUPP;
														
 
															+	return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
														
 
															+}
														
 
															+
														
 
															+int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
														
 
															+		   size_t size, int flags)
														
 
															+{
														
 
															+	/*
														
 
															+	 * If this is a request for a synthetic attribute in the system.*
														
 
															+	 * namespace use the generic infrastructure to resolve a handler
														
 
															+	 * for it via sb->s_xattr.
														
 
															+	 */
														
 
															+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
														
 
															+		return generic_setxattr(dentry, name, value, size, flags);
														
 
															+
														
 
															+	if (!btrfs_is_valid_xattr(name))
														
 
															+		return -EOPNOTSUPP;
														
 
															+
														
 
															+	if (size == 0)
														
 
															+		value = "";  /* empty EA, do not remove */
														
 
															+	return __btrfs_setxattr(dentry->d_inode, name, value, size, flags);
														
 
															+}
														
 
															+
														
 
															+int btrfs_removexattr(struct dentry *dentry, const char *name)
														
 
															+{
														
 
															+	/*
														
 
															+	 * If this is a request for a synthetic attribute in the system.*
														
 
															+	 * namespace use the generic infrastructure to resolve a handler
														
 
															+	 * for it via sb->s_xattr.
														
 
															+	 */
														
 
															+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
														
 
															+		return generic_removexattr(dentry, name);
														
 
															+
														
 
															+	if (!btrfs_is_valid_xattr(name))
														
 
															+		return -EOPNOTSUPP;
														
 
															+	return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
														
 
															+}
														
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -0,0 +1,39 @@
 
															+/*
														
 
															+ * Copyright (C) 2007 Red Hat.  All rights reserved.
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public
														
 
															+ * License v2 as published by the Free Software Foundation.
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
 
															+ * General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public
														
 
															+ * License along with this program; if not, write to the
														
 
															+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
 
															+ * Boston, MA 021110-1307, USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __XATTR__
														
 
															+#define __XATTR__
														
 
															+
														
 
															+#include <linux/xattr.h>
														
 
															+
														
 
															+extern struct xattr_handler btrfs_xattr_acl_access_handler;
														
 
															+extern struct xattr_handler btrfs_xattr_acl_default_handler;
														
 
															+extern struct xattr_handler *btrfs_xattr_handlers[];
														
 
															+
														
 
															+extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
														
 
															+		void *buffer, size_t size);
														
 
															+extern int __btrfs_setxattr(struct inode *inode, const char *name,
														
 
															+		const void *value, size_t size, int flags);
														
 
															+
														
 
															+extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
														
 
															+		void *buffer, size_t size);
														
 
															+extern int btrfs_setxattr(struct dentry *dentry, const char *name,
														
 
															+		const void *value, size_t size, int flags);
														
 
															+extern int btrfs_removexattr(struct dentry *dentry, const char *name);
														
 
															+
														
 
															+#endif /* __XATTR__ */