Merge branch 'master' into feature/crypto

Change-Id: I680e986a2be9df72e07a94fb9c2b94d6f68c42a2
2015-04-21 14:42:48 +01:00 · 2015-04-21 14:42:48 +01:00 · 5bb7c286eb
parent 555ff2631e ad66801915
commit 5bb7c286eb
124 changed files with 19872 additions and 3770 deletions
--- a/.mailmap
+++ b/.mailmap
@ -51,7 +51,7 @@ Tom Fifield <tom@openstack.org> Tom Fifield <fifieldt@unimelb.edu.au>
 Sascha Peilicke <saschpe@gmx.de> Sascha Peilicke <saschpe@suse.de>
 Zhenguo Niu <zhenguo@unitedstack.com> <Niu.ZGlinux@gmail.com>
 Peter Portante <peter.portante@redhat.com> <peter.a.portante@gmail.com>
-Christian Schwede <info@cschwede.de> <christian.schwede@enovance.com>
+Christian Schwede <christian.schwede@enovance.com> <info@cschwede.de>
 Constantine Peresypkin <constantine.peresypk@rackspace.com> <constantine@litestack.com>
 Madhuri Kumari <madhuri.rai07@gmail.com> madhuri <madhuri@madhuri-VirtualBox.(none)>
 Morgan Fainberg <morgan.fainberg@gmail.com> <m@metacloud.com>
@ -70,3 +70,5 @@ Jing Liuqing <jing.liuqing@99cloud.net> <jing.liuqing@99cloud.net>
 Lorcan Browne <lorcan.browne@hp.com> <lorcan.browne@hp.com>
 Eohyung Lee <liquidnuker@gmail.com> <liquid@kt.com>
 Harshit Chitalia <harshit@acelio.com> <harshit@acelio.com>
+Richard Hawkins <richard.hawkins@rackspace.com>
+Sarvesh Ranjan <saranjan@cisco.com>
--- a/15
+++ b/15
@ -29,6 +29,7 @@ Mehdi Abaakouk (mehdi.abaakouk@enovance.com)
 Jesse Andrews (anotherjesse@gmail.com)
 Joe Arnold (joe@swiftstack.com)
 Ionuț Arțăriși (iartarisi@suse.cz)
+Bob Ball (bob.ball@citrix.com)
 Christian Berendt (berendt@b1-systems.de)
 Luis de Bethencourt (luis@debethencourt.com)
 Keshava Bharadwaj (kb.sankethi@gmail.com)
@ -60,10 +61,13 @@ Cedric Dos Santos (cedric.dos.sant@gmail.com)
 Gerry Drudy (gerry.drudy@hp.com)
 Morgan Fainberg (morgan.fainberg@gmail.com)
 ZhiQiang Fan (aji.zqfan@gmail.com)
+Mike Fedosin (mfedosin@mirantis.com)
+Ricardo Ferreira (ricardo.sff@gmail.com)
 Flaper Fesp (flaper87@gmail.com)
 Tom Fifield (tom@openstack.org)
 Florent Flament (florent.flament-ext@cloudwatt.com)
 Gaurav B. Gangalwar (gaurav@gluster.com)
+Jiangmiao Gao (tolbkni@gmail.com)
 Alex Gaynor (alex.gaynor@gmail.com)
 Martin Geisler (martin@geisler.net)
 Anne Gentle (anne@openstack.org)
@ -71,12 +75,13 @@ Clay Gerrard (clay.gerrard@gmail.com)
 Filippo Giunchedi (fgiunchedi@wikimedia.org)
 Mark Gius (launchpad@markgius.com)
 David Goetz (david.goetz@rackspace.com)
+Tushar Gohad (tushar.gohad@intel.com)
 Jonathan Gonzalez V (jonathan.abdiel@gmail.com)
 Joe Gordon (jogo@cloudscaling.com)
 David Hadas (davidh@il.ibm.com)
 Andrew Hale (andy@wwwdata.eu)
 Soren Hansen (soren@linux2go.dk)
-Richard (Rick) Hawkins (richard.hawkins@rackspace.com)
+Richard Hawkins (richard.hawkins@rackspace.com)
 Gregory Haynes (greg@greghaynes.net)
 Doug Hellmann (doug.hellmann@dreamhost.com)
 Dan Hersam (dan.hersam@hp.com)
@ -94,6 +99,7 @@ Paul Jimenez (pj@place.org)
 Zhang Jinnan (ben.os@99cloud.net)
 Jason Johnson (jajohnson@softlayer.com)
 Brian K. Jones (bkjones@gmail.com)
+Arnaud JOST (arnaud.jost@ovh.net)
 Kiyoung Jung (kiyoung.jung@kt.com)
 Takashi Kajinami (kajinamit@nttdata.co.jp)
 Matt Kassawara (mkassawara@gmail.com)
@ -104,6 +110,7 @@ Dae S. Kim (dae@velatum.com)
 Nathan Kinder (nkinder@redhat.com)
 Eugene Kirpichov (ekirpichov@gmail.com)
 Leah Klearman (lklrmn@gmail.com)
+Martin Kletzander (mkletzan@redhat.com)
 Steve Kowalik (steven@wedontsleep.org)
 Sergey Kraynev (skraynev@mirantis.com)
 Sushil Kumar (sushil.kumar2@globallogic.com)
@ -155,6 +162,7 @@ Constantine Peresypkin (constantine.peresypk@rackspace.com)
 Dieter Plaetinck (dieter@vimeo.com)
 Dan Prince (dprince@redhat.com)
 Felipe Reyes (freyes@tty.cl)
+Janie Richling (jrichli@us.ibm.com)
 Matt Riedemann (mriedem@us.ibm.com)
 Li Riqiang (lrqrun@gmail.com)
 Rafael Rivero (rafael@cloudscaling.com)
@ -163,10 +171,11 @@ Aaron Rosen (arosen@nicira.com)
 Brent Roskos (broskos@internap.com)
 Shilla Saebi (shilla.saebi@gmail.com)
 Cristian A Sanchez (cristian.a.sanchez@intel.com)
-saranjan (saranjan@cisco.com)
-Christian Schwede (info@cschwede.de)
+Sarvesh Ranjan (saranjan@cisco.com)
+Christian Schwede (christian.schwede@enovance.com)
 Mark Seger (Mark.Seger@hp.com)
 Andrew Clay Shafer (acs@parvuscaptus.com)
+Mitsuhiro SHIGEMATSU (shigematsu.mitsuhiro@lab.ntt.co.jp)
 Dhriti Shikhar (dhrish20@gmail.com)
 Chuck Short (chuck.short@canonical.com)
 Michael Shuler (mshuler@gmail.com)
--- a/60
+++ b/60
@ -1,3 +1,63 @@
+swift (2.3.0)
+
+    * Erasure Code support (beta)
+
+      Swift now supports an erasure-code (EC) storage policy type. This allows
+      deployers to achieve very high durability with less raw capacity as used
+      in replicated storage. However, EC requires more CPU and network
+      resources, so it is not good for every use case. EC is great for storing
+      large, infrequently accessed data in a single region.
+
+      Swift's implementation of erasure codes is meant to be transparent to
+      end users. There is no API difference between replicated storage and
+      EC storage.
+
+      To support erasure codes, Swift now depends on PyECLib and
+      liberasurecode. liberasurecode is a pluggable library that allows for
+      the actual EC algorithm to be implemented in a library of your choosing.
+
+      As a beta release, EC support is nearly fully feature complete, but it
+      is lacking support for some features (like multi-range reads) and has
+      not had a full performance characterization. This feature relies on
+      ssync for durability. Deployers are urged to do extensive testing and
+      not deploy production data using an erasure code storage policy.
+
+      Full docs are at http://swift.openstack.org/overview_erasure_code.html
+
+    * Add support for container TempURL Keys.
+
+    * Make more memcache options configurable. connection_timeout,
+      pool_timeout, tries, and io_timeout are all now configurable.
+
+    * Swift now supports composite tokens. This allows another service to
+      act on behalf of a user, but only with that user's consent.
+      See http://swift.openstack.org/overview_auth.html for more details.
+
+    * Multi-region replication was improved. When replicating data to a
+      different region, only one replica will be pushed per replication
+      cycle. This gives the remote region a chance to replicate the data
+      locally instead of pushing more data over the inter-region network.
+
+    * Internal requests from the ratelimit middleware now properly log a
+      swift_source. See http://swift.openstack.org/logs.html for details.
+
+    * Improved storage policy support for quarantine stats in swift-recon.
+
+    * The proxy log line now includes the request's storage policy index.
+
+    * Ring checker has been added to swift-recon to validate if rings are
+      built correctly. As part of this feature, storage servers have learned
+      the OPTIONS verb.
+
+    * Add support of x-remove- headers for container-sync.
+
+    * Rings now support hostnames instead of just IP addresses.
+
+    * Swift now enforces that the API version on a request is valid. Valid
+      versions are configured via the valid_api_versions setting in swift.conf
+
+    * Various other minor bug fixes and improvements.
+
 swift (2.2.2)

    * Data placement changes
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -11,6 +11,29 @@ we won't be able to respond to pull requests submitted through GitHub.
 Bugs should be filed [on Launchpad](https://bugs.launchpad.net/swift),
 not in GitHub's issue tracker.

+
+Swift Design Principles
+=======================
+
+  * [The Zen of Python](http://legacy.python.org/dev/peps/pep-0020/)
+  * Simple Scales
+  * Minimal dependencies
+  * Re-use existing tools and libraries when reasonable
+  * Leverage the economies of scale
+  * Small, loosely coupled RESTful services
+  * No single points of failure
+  * Start with the use case
+  * ... then design from the cluster operator up
+  * If you haven't argued about it, you don't have the right answer yet :)
+  * If it is your first implementation, you probably aren't done yet :)
+
+Please don't feel offended by difference of opinion.  Be prepared to advocate
+for your change and iterate on it based on feedback.  Reach out to other people
+working on the project on
+[IRC](http://eavesdrop.openstack.org/irclogs/%23openstack-swift/) or the
+[mailing list](http://lists.openstack.org/pipermail/openstack-dev/) - we want
+to help.
+
 Recommended workflow
 ====================

--- a/bin/swift-drive-audit
+++ b/bin/swift-drive-audit
@ -176,6 +176,7 @@ if __name__ == '__main__':
    if not devices:
        logger.error("Error: No devices found!")
    recon_errors = {}
+    total_errors = 0
    for device in devices:
        recon_errors[device['mount_point']] = 0
    errors = get_errors(error_re, log_file_pattern, minutes, logger)
@ -198,8 +199,10 @@ if __name__ == '__main__':
                        comment_fstab(mount_point)
                        unmounts += 1
                    recon_errors[mount_point] = count
+                    total_errors += count
    recon_file = recon_cache_path + "/drive.recon"
    dump_recon_cache(recon_errors, recon_file, logger)
+    dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger)

    if unmounts == 0:
        logger.info("No drives were unmounted")
--- a/bin/swift-object-reconstructor
+++ b/bin/swift-object-reconstructor
@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# Copyright (c) 2010-2012 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from swift.obj.reconstructor import ObjectReconstructor
+from swift.common.utils import parse_options
+from swift.common.daemon import run_daemon
+from optparse import OptionParser
+
+if __name__ == '__main__':
+    parser = OptionParser("%prog CONFIG [options]")
+    parser.add_option('-d', '--devices',
+                      help='Reconstruct only given devices. '
+                           'Comma-separated list')
+    parser.add_option('-p', '--partitions',
+                      help='Reconstruct only given partitions. '
+                           'Comma-separated list')
+    conf_file, options = parse_options(parser=parser, once=True)
+    run_daemon(ObjectReconstructor, conf_file, **options)
--- a/doc/manpages/container-server.conf.5
+++ b/doc/manpages/container-server.conf.5
@ -270,6 +270,10 @@ If you need to use an HTTP Proxy, set it here; defaults to no proxy.
 Will audit, at most, each container once per interval. The default is 300 seconds.
 .IP \fBcontainer_time\fR
 Maximum amount of time to spend syncing each container per pass. The default is 60 seconds.
+.IP \fBrequest_retries\fR
+Server errors from requests will be retried by default.
+.IP \fBinternal_client_conf_path\fR
+Internal client config file path.
 .RE
 .PD

--- a/doc/manpages/swift-recon.1
+++ b/doc/manpages/swift-recon.1
@ -62,16 +62,32 @@ Get replication stats
 Check cluster for unmounted devices
 .IP "\fB-d, --diskusage\fR"
 Get disk usage stats
+.IP "\fB--top=COUNT\fR"
+Also show the top COUNT entries in rank order
+.IP "\fB--lowest=COUNT\fR"
+Also show the lowest COUNT entries in rank order
+.IP "\fB--human-readable\fR"
+Use human readable suffix for disk usage stats
 .IP "\fB-l, --loadstats\fR"
 Get cluster load average stats
 .IP "\fB-q, --quarantined\fR"
 Get cluster quarantine stats
+.IP "\fB--validate-servers\fR"
+Validate servers on the ring
 .IP "\fB--md5\fR"
-Get md5sum of servers ring and compare to local cop
+Get md5sum of servers ring and compare to local copy
+.IP "\fB--sockstat\fR"
+Get cluster socket usage stats
+.IP "\fB--driveaudit\fR"
+Get drive audit error stats
 .IP "\fB--all\fR"
 Perform all checks. Equivalent to \-arudlq \-\-md5
+.IP "\fB--region=REGION\fR"
+Only query servers in specified region
 .IP "\fB-z ZONE, --zone=ZONE\fR"
 Only query servers in specified zone
+.IP "\fB-t SECONDS, --timeout=SECONDS\fR"
+Time to wait for a response from a server
 .IP "\fB--swiftdir=PATH\fR"
 Default = /etc/swift
 .PD
--- a/doc/saio/bin/remakerings
+++ b/doc/saio/bin/remakerings
@ -16,6 +16,16 @@ swift-ring-builder object-1.builder add r1z2-127.0.0.1:6020/sdb2 1
 swift-ring-builder object-1.builder add r1z3-127.0.0.1:6030/sdb3 1
 swift-ring-builder object-1.builder add r1z4-127.0.0.1:6040/sdb4 1
 swift-ring-builder object-1.builder rebalance
+swift-ring-builder object-2.builder create 10 6 1
+swift-ring-builder object-2.builder add r1z1-127.0.0.1:6010/sdb1 1
+swift-ring-builder object-2.builder add r1z1-127.0.0.1:6010/sdb5 1
+swift-ring-builder object-2.builder add r1z2-127.0.0.1:6020/sdb2 1
+swift-ring-builder object-2.builder add r1z2-127.0.0.1:6020/sdb6 1
+swift-ring-builder object-2.builder add r1z3-127.0.0.1:6030/sdb3 1
+swift-ring-builder object-2.builder add r1z3-127.0.0.1:6030/sdb7 1
+swift-ring-builder object-2.builder add r1z4-127.0.0.1:6040/sdb4 1
+swift-ring-builder object-2.builder add r1z4-127.0.0.1:6040/sdb8 1
+swift-ring-builder object-2.builder rebalance
 swift-ring-builder container.builder create 10 3 1
 swift-ring-builder container.builder add r1z1-127.0.0.1:6011/sdb1 1
 swift-ring-builder container.builder add r1z2-127.0.0.1:6021/sdb2 1
--- a/doc/saio/bin/resetswift
+++ b/doc/saio/bin/resetswift
@ -9,7 +9,10 @@ sudo mkfs.xfs -f ${SAIO_BLOCK_DEVICE:-/dev/sdb1}
 sudo mount /mnt/sdb1
 sudo mkdir /mnt/sdb1/1 /mnt/sdb1/2 /mnt/sdb1/3 /mnt/sdb1/4
 sudo chown ${USER}:${USER} /mnt/sdb1/*
-mkdir -p /srv/1/node/sdb1 /srv/2/node/sdb2 /srv/3/node/sdb3 /srv/4/node/sdb4
+mkdir -p /srv/1/node/sdb1 /srv/1/node/sdb5 \
+         /srv/2/node/sdb2 /srv/2/node/sdb6 \
+         /srv/3/node/sdb3 /srv/3/node/sdb7 \
+         /srv/4/node/sdb4 /srv/4/node/sdb8
 sudo rm -f /var/log/debug /var/log/messages /var/log/rsyncd.log /var/log/syslog
 find /var/cache/swift* -type f -name *.recon -exec rm -f {} \;
 # On Fedora use "systemctl restart <service>"
--- a/doc/saio/swift/object-server/1.conf
+++ b/doc/saio/swift/object-server/1.conf
@ -22,6 +22,8 @@ use = egg:swift#recon
 [object-replicator]
 vm_test_mode = yes

+[object-reconstructor]
+
 [object-updater]

 [object-auditor]
--- a/doc/saio/swift/object-server/2.conf
+++ b/doc/saio/swift/object-server/2.conf
@ -22,6 +22,8 @@ use = egg:swift#recon
 [object-replicator]
 vm_test_mode = yes

+[object-reconstructor]
+
 [object-updater]

 [object-auditor]
--- a/doc/saio/swift/object-server/3.conf
+++ b/doc/saio/swift/object-server/3.conf
@ -22,6 +22,8 @@ use = egg:swift#recon
 [object-replicator]
 vm_test_mode = yes

+[object-reconstructor]
+
 [object-updater]

 [object-auditor]
--- a/doc/saio/swift/object-server/4.conf
+++ b/doc/saio/swift/object-server/4.conf
@ -22,6 +22,8 @@ use = egg:swift#recon
 [object-replicator]
 vm_test_mode = yes

+[object-reconstructor]
+
 [object-updater]

 [object-auditor]
--- a/doc/saio/swift/swift.conf
+++ b/doc/saio/swift/swift.conf
@ -5,7 +5,16 @@ swift_hash_path_suffix = changeme

 [storage-policy:0]
 name = gold
+policy_type = replication
 default = yes

 [storage-policy:1]
 name = silver
+policy_type = replication
+
+[storage-policy:2]
+name = ec42
+policy_type = erasure_coding
+ec_type = jerasure_rs_vand
+ec_num_data_fragments = 4
+ec_num_parity_fragments = 2
--- a/doc/source/admin_guide.rst
+++ b/doc/source/admin_guide.rst
@ -88,6 +88,16 @@ attempting to write to or read the builder/ring files while operations are in
 progress. This can be useful in environments where ring management has been
 automated but the operator still needs to interact with the rings manually.

+If the ring builder is not producing the balances that you are
+expecting, you can gain visibility into what it's doing with the
+``--debug`` flag.::
+
+    swift-ring-builder <builder-file> rebalance --debug
+
+This produces a great deal of output that is mostly useful if you are
+either (a) attempting to fix the ring builder, or (b) filing a bug
+against the ring builder.
+
 -----------------------
 Scripting Ring Creation
 -----------------------
--- a/doc/source/associated_projects.rst
+++ b/doc/source/associated_projects.rst
@ -104,5 +104,7 @@ Other
 * `Swiftsync <https://github.com/stackforge/swiftsync>`_ - A massive syncer between two swift clusters.
 * `Django Swiftbrowser <https://github.com/cschwede/django-swiftbrowser>`_ - Simple Django web app to access Openstack Swift.
 * `Swift-account-stats <https://github.com/enovance/swift-account-stats>`_ - Swift-account-stats is a tool to report statistics on Swift usage at tenant and global levels.
+* `PyECLib <https://bitbucket.org/kmgreen2/pyeclib>`_ - High Level Erasure Code library used by Swift
+* `liberasurecode <http://www.bytebucket.org/tsg-/liberasurecode>`_ - Low Level Erasure Code library used by PyECLib
 * `Swift Browser <https://github.com/zerovm/swift-browser>`_ - JavaScript interface for Swift
 * `swift-ui <https://github.com/fanatic/swift-ui>`_ - OpenStack Swift web browser
--- a/doc/source/development_guidelines.rst
+++ b/doc/source/development_guidelines.rst
@ -70,6 +70,35 @@ When using the 'in-process test' mode, the optional in-memory
 object server may be selected by setting the environment variable
 ``SWIFT_TEST_IN_MEMORY_OBJ`` to a true value.

+The 'in-process test' mode searches for ``proxy-server.conf`` and
+``swift.conf`` config files from which it copies config options and overrides
+some options to suit in process testing. The search will first look for config
+files in a ``<custom_conf_source_dir>`` that may optionally be specified using
+the environment variable::
+
+     SWIFT_TEST_IN_PROCESS_CONF_DIR=<custom_conf_source_dir>
+
+If ``SWIFT_TEST_IN_PROCESS_CONF_DIR`` is not set, or if a config file is not
+found in ``<custom_conf_source_dir>``, the search will then look in the
+``etc/`` directory in the source tree. If the config file is still not found,
+the corresponding sample config file from ``etc/`` is used (e.g.
+``proxy-server.conf-sample`` or ``swift.conf-sample``).
+
+The environment variable ``SWIFT_TEST_POLICY`` may be set to specify
+a particular storage policy *name* that will be used for testing. When set,
+this policy must exist in the ``swift.conf`` file and its corresponding ring
+file must exist in ``<custom_conf_source_dir>`` (if specified) or ``etc/``. The
+test setup will set the specified policy to be the default and use its ring
+file properties for constructing the test object ring. This allows in-process
+testing to be run against various policy types and ring files.
+
+For example, this command would run the in-process mode functional tests
+using config files found in ``$HOME/my_tests`` and policy 'silver'::
+
+ SWIFT_TEST_IN_PROCESS=1 SWIFT_TEST_IN_PROCESS_CONF_DIR=$HOME/my_tests \
+    SWIFT_TEST_POLICY=silver tox -e func
+
+
 ------------
 Coding Style
 ------------
--- a/doc/source/development_saio.rst
+++ b/doc/source/development_saio.rst
@ -87,8 +87,11 @@ another device when creating the VM, and follow these instructions:
        sudo chown ${USER}:${USER} /mnt/sdb1/*
        sudo mkdir /srv
        for x in {1..4}; do sudo ln -s /mnt/sdb1/$x /srv/$x; done
-        sudo mkdir -p /srv/1/node/sdb1 /srv/2/node/sdb2 /srv/3/node/sdb3 \
-                      /srv/4/node/sdb4 /var/run/swift
+        sudo mkdir -p /srv/1/node/sdb1 /srv/1/node/sdb5 \
+                      /srv/2/node/sdb2 /srv/2/node/sdb6 \
+                      /srv/3/node/sdb3 /srv/3/node/sdb7 \
+                      /srv/4/node/sdb4 /srv/4/node/sdb8 \
+                      /var/run/swift
        sudo chown -R ${USER}:${USER} /var/run/swift
        # **Make sure to include the trailing slash after /srv/$x/**
        for x in {1..4}; do sudo chown -R ${USER}:${USER} /srv/$x/; done
@ -124,7 +127,11 @@ these instructions:
        sudo mkdir /mnt/sdb1/1 /mnt/sdb1/2 /mnt/sdb1/3 /mnt/sdb1/4
        sudo chown ${USER}:${USER} /mnt/sdb1/*
        for x in {1..4}; do sudo ln -s /mnt/sdb1/$x /srv/$x; done
-        sudo mkdir -p /srv/1/node/sdb1 /srv/2/node/sdb2 /srv/3/node/sdb3 /srv/4/node/sdb4 /var/run/swift
+        sudo mkdir -p /srv/1/node/sdb1 /srv/1/node/sdb5 \
+                      /srv/2/node/sdb2 /srv/2/node/sdb6 \
+                      /srv/3/node/sdb3 /srv/3/node/sdb7 \
+                      /srv/4/node/sdb4 /srv/4/node/sdb8 \
+                      /var/run/swift
        sudo chown -R ${USER}:${USER} /var/run/swift
        # **Make sure to include the trailing slash after /srv/$x/**
        for x in {1..4}; do sudo chown -R ${USER}:${USER} /srv/$x/; done
@ -402,7 +409,7 @@ Setting up scripts for running Swift

  #. Copy the SAIO scripts for resetting the environment::

-        cd $HOME/swift/doc; cp -r saio/bin $HOME/bin; cd -
+        cd $HOME/swift/doc; cp saio/bin/* $HOME/bin; cd -
        chmod +x $HOME/bin/*

  #. Edit the ``$HOME/bin/resetswift`` script
@ -455,30 +462,41 @@ Setting up scripts for running Swift

        .. literalinclude:: /../saio/bin/remakerings

-     You can expect the output from this command to produce the following (note
-     that 2 object rings are created in order to test storage policies in the
-     SAIO environment however they map to the same nodes)::
+     You can expect the output from this command to produce the following.  Note
+     that 3 object rings are created in order to test storage policies and EC in
+     the SAIO environment.  The EC ring is the only one with all 8 devices.
+     There are also two replication rings, one for 3x replication and another
+     for 2x replication, but those rings only use 4 devices::

        Device d0r1z1-127.0.0.1:6010R127.0.0.1:6010/sdb1_"" with 1.0 weight got id 0
        Device d1r1z2-127.0.0.1:6020R127.0.0.1:6020/sdb2_"" with 1.0 weight got id 1
        Device d2r1z3-127.0.0.1:6030R127.0.0.1:6030/sdb3_"" with 1.0 weight got id 2
        Device d3r1z4-127.0.0.1:6040R127.0.0.1:6040/sdb4_"" with 1.0 weight got id 3
-        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.
+        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.  Dispersion is now 0.00
        Device d0r1z1-127.0.0.1:6010R127.0.0.1:6010/sdb1_"" with 1.0 weight got id 0
        Device d1r1z2-127.0.0.1:6020R127.0.0.1:6020/sdb2_"" with 1.0 weight got id 1
        Device d2r1z3-127.0.0.1:6030R127.0.0.1:6030/sdb3_"" with 1.0 weight got id 2
        Device d3r1z4-127.0.0.1:6040R127.0.0.1:6040/sdb4_"" with 1.0 weight got id 3
-        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.
+        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.  Dispersion is now 0.00
+        Device d0r1z1-127.0.0.1:6010R127.0.0.1:6010/sdb1_"" with 1.0 weight got id 0
+        Device d1r1z1-127.0.0.1:6010R127.0.0.1:6010/sdb5_"" with 1.0 weight got id 1
+        Device d2r1z2-127.0.0.1:6020R127.0.0.1:6020/sdb2_"" with 1.0 weight got id 2
+        Device d3r1z2-127.0.0.1:6020R127.0.0.1:6020/sdb6_"" with 1.0 weight got id 3
+        Device d4r1z3-127.0.0.1:6030R127.0.0.1:6030/sdb3_"" with 1.0 weight got id 4
+        Device d5r1z3-127.0.0.1:6030R127.0.0.1:6030/sdb7_"" with 1.0 weight got id 5
+        Device d6r1z4-127.0.0.1:6040R127.0.0.1:6040/sdb4_"" with 1.0 weight got id 6
+        Device d7r1z4-127.0.0.1:6040R127.0.0.1:6040/sdb8_"" with 1.0 weight got id 7
+        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.  Dispersion is now 0.00
        Device d0r1z1-127.0.0.1:6011R127.0.0.1:6011/sdb1_"" with 1.0 weight got id 0
        Device d1r1z2-127.0.0.1:6021R127.0.0.1:6021/sdb2_"" with 1.0 weight got id 1
        Device d2r1z3-127.0.0.1:6031R127.0.0.1:6031/sdb3_"" with 1.0 weight got id 2
        Device d3r1z4-127.0.0.1:6041R127.0.0.1:6041/sdb4_"" with 1.0 weight got id 3
-        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.
+        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.  Dispersion is now 0.00
        Device d0r1z1-127.0.0.1:6012R127.0.0.1:6012/sdb1_"" with 1.0 weight got id 0
        Device d1r1z2-127.0.0.1:6022R127.0.0.1:6022/sdb2_"" with 1.0 weight got id 1
        Device d2r1z3-127.0.0.1:6032R127.0.0.1:6032/sdb3_"" with 1.0 weight got id 2
        Device d3r1z4-127.0.0.1:6042R127.0.0.1:6042/sdb4_"" with 1.0 weight got id 3
-        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.
+        Reassigned 1024 (100.00%) partitions. Balance is now 0.00.  Dispersion is now 0.00

  #. Read more about Storage Policies and your SAIO :doc:`policies_saio`

--- a/doc/source/howto_installmultinode.rst
+++ b/doc/source/howto_installmultinode.rst
@ -2,7 +2,7 @@
 Instructions for a Multiple Server Swift Installation
 =====================================================

-Please refer to the latest offical
+Please refer to the latest official
 `Openstack Installation Guides <http://docs.openstack.org/#install-guides>`_
 for the most up-to-date documentation.

--- a/doc/source/images/ec_overview.png
+++ b/doc/source/images/ec_overview.png
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@ -56,6 +56,7 @@ Overview and Concepts
    overview_expiring_objects
    cors
    crossdomain
+    overview_erasure_code
    overview_backing_store
    associated_projects

--- a/doc/source/overview_architecture.rst
+++ b/doc/source/overview_architecture.rst
@ -11,7 +11,10 @@ Proxy Server
 The Proxy Server is responsible for tying together the rest of the Swift
 architecture. For each request, it will look up the location of the account,
 container, or object in the ring (see below) and route the request accordingly.
-The public API is also exposed through the Proxy Server.
+For Erasure Code type policies, the Proxy Server is also responsible for
+encoding and decoding object data.  See :doc:`overview_erasure_code` for
+complete information on Erasure Code suport.  The public API is also exposed
+through the Proxy Server.

 A large number of failures are also handled in the Proxy Server. For
 example, if a server is unavailable for an object PUT, it will ask the
@ -87,7 +90,8 @@ implementing a particular differentiation.
 For example, one might have the default policy with 3x replication, and create
 a second policy which, when applied to new containers only uses 2x replication.
 Another might add SSDs to a set of storage nodes and create a performance tier
-storage policy for certain containers to have their objects stored there.
+storage policy for certain containers to have their objects stored there.  Yet
+another might be the use of Erasure Coding to define a cold-storage tier.

 This mapping is then exposed on a per-container basis, where each container
 can be assigned a specific storage policy when it is created, which remains in
@ -156,6 +160,15 @@ item (object, container, or account) is deleted, a tombstone is set as the
 latest version of the item. The replicator will see the tombstone and ensure
 that the item is removed from the entire system.

+--------------
+Reconstruction
+--------------
+
+The reconstructor is used by Erasure Code policies and is analogous to the
+replicator for Replication type policies.  See :doc:`overview_erasure_code`
+for complete information on both Erasure Code support as well as the
+reconstructor.
+
 --------
 Updaters
 --------
--- a/doc/source/overview_erasure_code.rst
+++ b/doc/source/overview_erasure_code.rst
@ -0,0 +1,672 @@
+====================
+Erasure Code Support
+====================
+
+
+--------------------------
+Beta: Not production ready
+--------------------------
+The erasure code support in Swift is considered "beta" at this point.
+Most major functionality is included, but it has not been tested or validated
+at large scale. This feature relies on ssync for durability. Deployers are
+urged to do extensive testing and not deploy production data using an
+erasure code storage policy.
+
+If any bugs are found during testing, please report them to
+https://bugs.launchpad.net/swift
+
+
+-------------------------------
+History and Theory of Operation
+-------------------------------
+
+There's a lot of good material out there on Erasure Code (EC) theory, this short
+introduction is just meant to provide some basic context to help the reader
+better understand the implementation in Swift.
+
+Erasure Coding for storage applications grew out of Coding Theory as far back as
+the 1960s with the Reed-Solomon codes.  These codes have been used for years in
+applications ranging from CDs to DVDs to general communications and, yes, even
+in the space program starting with Voyager! The basic idea is that some amount
+of data is broken up into smaller pieces called fragments and coded in such a
+way that it can be transmitted with the ability to tolerate the loss of some
+number of the coded fragments.  That's where the word "erasure" comes in, if you
+transmit 14 fragments and only 13 are received then one of them is said to be
+"erased".  The word "erasure" provides an important distinction with EC; it
+isn't about detecting errors, it's about dealing with failures.  Another
+important element of EC is that the number of erasures that can be tolerated can
+be adjusted to meet the needs of the application.
+
+At a high level EC works by using a specific scheme to break up a single data
+buffer into several smaller data buffers then, depending on the scheme,
+performing some encoding operation on that data in order to generate additional
+information.  So you end up with more data than you started with and that extra
+data is often called "parity".  Note that there are many, many different
+encoding techniques that vary both in how they organize and manipulate the data
+as well by what means they use to calculate parity.  For example, one scheme
+might rely on `Galois Field Arithmetic <http://www.ssrc.ucsc.edu/Papers/plank-
+fast13.pdf>`_ while others may work with only XOR. The number of variations and
+details about their differences are well beyond the scope of this introduction,
+but we will talk more about a few of them when we get into the implementation of
+EC in Swift.
+
+--------------------------------
+Overview of EC Support in Swift
+--------------------------------
+
+First and foremost, from an application perspective EC support is totally
+transparent. There are no EC related external API; a container is simply created
+using a Storage Policy defined to use EC and then interaction with the cluster
+is the same as any other durability policy.
+
+EC is implemented in Swift as a Storage Policy, see :doc:`overview_policies` for
+complete details on Storage Policies.  Because support is implemented as a
+Storage Policy, all of the storage devices associated with your cluster's EC
+capability can be isolated.  It is entirely possible to share devices between
+storage policies, but for EC it may make more sense to not only use separate
+devices but possibly even entire nodes dedicated for EC.
+
+Which direction one chooses depends on why the EC policy is being deployed.  If,
+for example, there is a production replication policy in place already and the
+goal is to add a cold storage tier such that the existing nodes performing
+replication are impacted as little as possible, adding a new set of nodes
+dedicated to EC might make the most sense but also incurs the most cost.  On the
+other hand, if EC is being added as a capability to provide additional
+durability for a specific set of applications and the existing infrastructure is
+well suited for EC (sufficient number of nodes, zones for the EC scheme that is
+chosen) then leveraging the existing infrastructure such that the EC ring shares
+nodes with the replication ring makes the most sense.  These are some of the
+main considerations:
+
+* Layout of existing infrastructure.
+* Cost of adding dedicated EC nodes (or just dedicated EC devices).
+* Intended usage model(s).
+
+The Swift code base does not include any of the algorithms necessary to perform
+the actual encoding and decoding of data; that is left to external libraries.
+The Storage Policies architecture is leveraged to enable EC on a per container
+basis -- the object rings are still used to determine the placement of EC data
+fragments. Although there are several code paths that are unique to an operation
+associated with an EC policy, an external dependency to an Erasure Code library
+is what Swift counts on to perform the low level EC functions.  The use of an
+external library allows for maximum flexibility as there are a significant
+number of options out there, each with its owns pros and cons that can vary
+greatly from one use case to another.
+
+---------------------------------------
+PyECLib:  External Erasure Code Library
+---------------------------------------
+
+PyECLib is a Python Erasure Coding Library originally designed and written as
+part of the effort to add EC support to the Swift project, however it is an
+independent project.  The library provides a well-defined and simple Python
+interface and internally implements a plug-in architecture allowing it to take
+advantage of many well-known C libraries such as:
+
+* Jerasure and GFComplete at http://jerasure.org.
+* Intel(R) ISA-L at http://01.org/intel%C2%AE-storage-acceleration-library-open-source-version.
+* Or write your own!
+
+PyECLib uses a C based library called liberasurecode to implement the plug in
+infrastructure; liberasure code is available at:
+
+* liberasurecode: https://bitbucket.org/tsg-/liberasurecode
+
+PyECLib itself therefore allows for not only choice but further extensibility as
+well. PyECLib also comes with a handy utility to help determine the best
+algorithm to use based on the equipment that will be used (processors and server
+configurations may vary in performance per algorithm).  More on this will be
+covered in the configuration section.  PyECLib is included as a Swift
+requirement.
+
+For complete details see `PyECLib <https://bitbucket.org/kmgreen2/pyeclib>`_
+
+------------------------------
+Storing and Retrieving Objects
+------------------------------
+
+We will discuss the details of how PUT and GET work in the "Under the Hood"
+section later on. The key point here is that all of the erasure code work goes
+on behind the scenes; this summary is a high level information overview only.
+
+The PUT flow looks like this:
+
+#. The proxy server streams in an object and buffers up "a segment" of data
+   (size is configurable).
+#. The proxy server calls on PyECLib to encode the data into smaller fragments.
+#. The proxy streams the encoded fragments out to the storage nodes based on
+   ring locations.
+#. Repeat until the client is done sending data.
+#. The client is notified of completion when a quorum is met.
+
+The GET flow looks like this:
+
+#. The proxy server makes simultaneous requests to participating nodes.
+#. As soon as the proxy has the fragments it needs, it calls on PyECLib to
+   decode the data.
+#. The proxy streams the decoded data it has back to the client.
+#. Repeat until the proxy is done sending data back to the client.
+
+It may sound like, from this high level overview, that using EC is going to
+cause an explosion in the number of actual files stored in each node's local
+file system.  Although it is true that more files will be stored (because an
+object is broken into pieces), the implementation works to minimize this where
+possible, more details are available in the Under the Hood section.
+
+-------------
+Handoff Nodes
+-------------
+
+In EC policies, similarly to replication, handoff nodes are a set of storage
+nodes used to augment the list of primary nodes responsible for storing an
+erasure coded object. These handoff nodes are used in the event that one or more
+of the primaries are unavailable.  Handoff nodes are still selected with an
+attempt to achieve maximum separation of the data being placed.
+
+--------------
+Reconstruction
+--------------
+
+For an EC policy, reconstruction is analogous to the process of replication for
+a replication type policy -- essentially "the reconstructor" replaces "the
+replicator" for EC policy types. The basic framework of reconstruction is very
+similar to that of replication with a few notable exceptions:
+
+* Because EC does not actually replicate partitions, it needs to operate at a
+  finer granularity than what is provided with rsync, therefore EC leverages
+  much of ssync behind the scenes (you do not need to manually configure ssync).
+* Once a pair of nodes has determined the need to replace a missing object
+  fragment, instead of pushing over a copy like replication would do, the
+  reconstructor has to read in enough surviving fragments from other nodes and
+  perform a local reconstruction before it has the correct data to push to the
+  other node.
+* A reconstructor does not talk to all other reconstructors in the set of nodes
+  responsible for an EC partition, this would be far too chatty, instead each
+  reconstructor is responsible for sync'ing with the partition's closest two
+  neighbors (closest meaning left and right on the ring).
+
+.. note::
+
+    EC work (encode and decode) takes place both on the proxy nodes, for PUT/GET
+    operations, as well as on the storage nodes for reconstruction.  As with
+    replication, reconstruction can be the result of rebalancing, bit-rot, drive
+    failure or reverting data from a hand-off node back to its primary.
+
+--------------------------
+Performance Considerations
+--------------------------
+
+Efforts are underway to characterize performance of various Erasure Code
+schemes.  One of the main goals of the beta release is to perform this
+characterization and encourage others to do so and provide meaningful feedback
+to the development community.  There are many factors that will affect
+performance of EC so it is vital that we have multiple characterization
+activities happening.
+
+In general, EC has different performance characteristics than replicated data.
+EC requires substantially more CPU to read and write data, and is more suited
+for larger objects that are not frequently accessed (eg backups).
+
+----------------------------
+Using an Erasure Code Policy
+----------------------------
+
+To use an EC policy, the administrator simply needs to define an EC policy in
+`swift.conf` and create/configure the associated object ring.  An example of how
+an EC policy can be setup is shown below::
+
+        [storage-policy:2]
+        name = ec104
+        policy_type = erasure_coding
+        ec_type = jerasure_rs_vand
+        ec_num_data_fragments = 10
+        ec_num_parity_fragments = 4
+        ec_object_segment_size = 1048576
+
+Let's take a closer look at each configuration parameter:
+
+* ``name``: This is a standard storage policy parameter.
+  See :doc:`overview_policies` for details.
+* ``policy_type``: Set this to ``erasure_coding`` to indicate that this is an EC
+  policy.
+* ``ec_type``: Set this value according to the available options in the selected
+  PyECLib back-end. This specifies the EC scheme that is to be used.  For
+  example the option shown here selects Vandermonde Reed-Solomon encoding while
+  an option of ``flat_xor_hd_3`` would select Flat-XOR based HD combination
+  codes. See the `PyECLib <https://bitbucket.org/kmgreen2/pyeclib>`_ page for
+  full details.
+* ``ec_num_data_fragments``: The total number of fragments that will be
+  comprised of data.
+* ``ec_num_parity_fragments``: The total number of fragments that will be
+  comprised of parity.
+* ``ec_object_segment_size``: The amount of data that will be buffered up before
+  feeding a segment into the encoder/decoder. The default value is 1048576.
+
+When PyECLib encodes an object, it will break it into N fragments. However, what
+is important during configuration, is how many of those are data and how many
+are parity.  So in the example above, PyECLib will actually break an object in
+14 different fragments, 10 of them will be made up of actual object data and 4
+of them will be made of parity data (calculations depending on ec_type).
+
+When deciding which devices to use in the EC policy's object ring, be sure to
+carefully consider the performance impacts.  Running some performance
+benchmarking in a test environment for your configuration is highly recommended
+before deployment. Once you have configured your EC policy in `swift.conf` and
+created your object ring, your application is ready to start using EC simply by
+creating a container with the specified policy name and interacting as usual.
+
+.. note::
+
+    It's important to note that once you have deployed a policy and have created
+    objects with that policy, these configurations options cannot be changed. In
+    case a change in the configuration is desired, you must create a new policy
+    and migrate the data to a new container.
+
+Migrating Between Policies
+--------------------------
+
+A common usage of EC is to migrate less commonly accessed data from a more
+expensive but lower latency policy such as replication.  When an application
+determines that it wants to move data from a replication policy to an EC policy,
+it simply needs to move the data from the replicated container to an EC
+container that was created with the target durability policy.
+
+Region Support
+--------------
+
+For at least the initial version of EC, it is not recommended that an EC scheme
+span beyond a single region, neither performance nor functional validation has 
+be been done in such a configuration.
+
+--------------
+Under the Hood
+--------------
+
+Now that we've explained a little about EC support in Swift and how to
+configure/use it, let's explore how EC fits in at the nuts-n-bolts level.
+
+Terminology
+-----------
+
+The term 'fragment' has been used already to describe the output of the EC
+process (a series of fragments) however we need to define some other key terms
+here before going any deeper.  Without paying special attention to using the
+correct terms consistently, it is very easy to get confused in a hurry!
+
+* **chunk**: HTTP chunks received over wire (term not used to describe any EC
+  specific operation).
+* **segment**: Not to be confused with SLO/DLO use of the word, in EC we call a
+  segment a series of consecutive HTTP chunks buffered up before performing an
+  EC operation.
+* **fragment**: Data and parity 'fragments' are generated when erasure coding
+  transformation is applied to a segment.
+* **EC archive**: A concatenation of EC fragments; to a storage node this looks
+  like an object.
+* **ec_ndata**: Number of EC data fragments.
+* **ec_nparity**: Number of EC parity fragments.
+
+Middleware
+----------
+
+Middleware remains unchanged.  For most middleware (e.g., SLO/DLO) the fact that
+the proxy is fragmenting incoming objects is transparent.  For list endpoints,
+however, it is a bit different. A caller of list endpoints will get back the
+locations of all of the fragments.  The caller will be unable to re-assemble the
+original object with this information, however the node locations may still
+prove to be useful information for some applications.
+
+On Disk Storage
+---------------
+
+EC archives are stored on disk in their respective objects-N directory based on
+their policy index.  See :doc:`overview_policies` for details on per policy
+directory information.
+
+The actual names on disk of EC archives also have one additional piece of data
+encoded in the filename, the fragment archive index.
+
+Each storage policy now must include a transformation function that diskfile
+will use to build the filename to store on disk. The functions are implemented
+in the diskfile module as policy specific sub classes ``DiskFileManager``.
+
+This is required for a few reasons. For one, it allows us to store fragment
+archives of different indexes on the same storage node which is not typical
+however it is possible in many circumstances. Without unique filenames for the
+different EC archive files in a set, we would be at risk of overwriting one
+archive of index n with another of index m in some scenarios.
+
+The transformation function for the replication policy is simply a NOP. For
+reconstruction, the index is appended to the filename just before the .data
+extension. An example filename for a fragment archive storing the 5th fragment
+would like this this::
+
+    1418673556.92690#5.data
+
+An additional file is also included for Erasure Code policies called the
+``.durable`` file. Its meaning will be covered in detail later, however, its on-
+disk format does not require the name transformation function that was just
+covered.  The .durable for the example above would simply look like this::
+
+    1418673556.92690.durable
+
+And it would be found alongside every fragment specific .data file following a
+100% successful PUT operation.
+
+Proxy Server
+------------
+
+High Level
+==========
+
+The Proxy Server handles Erasure Coding in a different manner than replication,
+therefore there are several code paths unique to EC policies either though sub
+classing or simple conditionals.  Taking a closer look at the PUT and the GET
+paths will help make this clearer.  But first, a high level overview of how an
+object flows through the system:
+
+.. image:: images/ec_overview.png
+
+Note how:
+
+* Incoming objects are buffered into segments at the proxy.
+* Segments are erasure coded into fragments at the proxy.
+* The proxy stripes fragments across participating nodes such that the on-disk
+  stored files that we call a fragment archive is appended with each new
+  fragment.
+
+This scheme makes it possible to minimize the number of on-disk files given our
+segmenting and fragmenting.
+
+Multi_Phase Conversation
+========================
+
+Multi-part MIME document support is used to allow the proxy to engage in a
+handshake conversation with the storage node for processing PUT requests.  This
+is required for a few different reasons.
+
+#. From the perspective of the storage node, a fragment archive is really just
+   another object, we need a mechanism to send down the original object etag
+   after all fragment archives have landed.
+#. Without introducing strong consistency semantics, the proxy needs a mechanism
+   to know when a quorum of fragment archives have actually made it to disk
+   before it can inform the client of a successful PUT.
+
+MIME supports a conversation between the proxy and the storage nodes for every
+PUT. This provides us with the ability to handle a PUT in one connection and
+assure that we have the essence of a 2 phase commit, basically having the proxy
+communicate back to the storage nodes once it has confirmation that all fragment
+archives in the set have been committed. Note that we still require a quorum of
+data elements of the conversation to complete before signaling status to the
+client but we can relax that requirement for the commit phase such that only 2
+confirmations to that phase of the conversation are required for success as the
+reconstructor will assure propagation of markers that indicate data durability.
+
+This provides the storage node with a cheap indicator of the last known durable
+set of fragment archives for a given object on a successful durable PUT, this is
+known as the ``.durable`` file. The presence of a ``.durable`` file means, to
+the object server, `there is a set of ts.data files that are durable at
+timestamp ts.` Note that the completion of the commit phase of the conversation
+is also a signal for the object server to go ahead and immediately delete older
+timestamp files for this object. This is critical as we do not want to delete
+the older object until the storage node has confirmation from the proxy, via the
+multi-phase conversation, that the other nodes have landed enough for a quorum.
+
+The basic flow looks like this:
+
+ * The Proxy Server erasure codes and streams the object fragments
+   (ec_ndata + ec_nparity) to the storage nodes.
+ * The storage nodes store objects as EC archives and upon finishing object
+   data/metadata write, send a 1st-phase response to proxy.
+ * Upon quorum of storage nodes responses, the proxy initiates 2nd-phase by
+   sending commit confirmations to object servers.
+ * Upon receipt of commit message, object servers store a 0-byte data file as
+   `<timestamp>.durable` indicating successful PUT, and send a final response to
+   the proxy server.
+ * The proxy waits for a minimal number of two object servers to respond with a
+   success (2xx) status before responding to the client with a successful
+   status. In this particular case it was decided that two responses was
+   the mininum amount to know that the file would be propagated in case of
+   failure from other others and because a greater number would potentially
+   mean more latency, which should be avoided if possible.
+
+Here is a high level example of what the conversation looks like::
+
+    proxy: PUT /p/a/c/o
+         Transfer-Encoding': 'chunked'
+         Expect': '100-continue'
+         X-Backend-Obj-Multiphase-Commit: yes
+    obj:   100 Continue
+         X-Obj-Multiphase-Commit: yes
+    proxy: --MIMEboundary
+         X-Document: object body
+         <obj_data>
+         --MIMEboundary
+         X-Document: object metadata
+         Content-MD5: <footer_meta_cksum>
+         <footer_meta>
+         --MIMEboundary
+    <object server writes data, metadata>
+    obj:   100 Continue
+    <quorum>
+    proxy: X-Document: put commit
+         commit_confirmation
+         --MIMEboundary--
+    <object server writes ts.durable state>
+    obj:   20x
+    <proxy waits to receive >=2 2xx responses>
+    proxy: 2xx -> client
+
+A few key points on the .durable file:
+
+* The .durable file means \"the matching .data file for this has sufficient
+  fragment archives somewhere, committed, to reconstruct the object\".
+* The Proxy Server will never have knowledge, either on GET or HEAD, of the
+  existence of a .data file on an object server if it does not have a matching
+  .durable file.
+* The object server will never return a .data that does not have a matching
+  .durable.
+* When a proxy does a GET, it will only receive fragment archives that have
+  enough present somewhere to be reconstructed.
+
+Partial PUT Failures
+====================
+
+A partial PUT failure has a few different modes.  In one scenario the Proxy
+Server is alive through the entire PUT conversation.  This is a very
+straightforward case. The client will receive a good response if and only if a
+quorum of fragment archives were successfully landed on their storage nodes.  In
+this case the Reconstructor will discover the missing fragment archives, perform
+a reconstruction and deliver fragment archives and their matching .durable files
+to the nodes.
+
+The more interesting case is what happens if the proxy dies in the middle of a
+conversation.  If it turns out that a quorum had been met and the commit phase
+of the conversation finished, its as simple as the previous case in that the
+reconstructor will repair things.  However, if the commit didn't get a change to
+happen then some number of the storage nodes have .data files on them (fragment
+archives) but none of them knows whether there are enough elsewhere for the
+entire object to be reconstructed.  In this case the client will not have
+received a 2xx response so there is no issue there, however, it is left to the
+storage nodes to clean up the stale fragment archives.  Work is ongoing in this
+area to enable the proxy to play a role in reviving these fragment archives,
+however, for the current release, a proxy failure after the start of a
+conversation but before the commit message will simply result in a PUT failure.
+
+GET
+===
+
+The GET for EC is different enough from replication that subclassing the
+`BaseObjectController` to the `ECObjectController` enables an efficient way to
+implement the high level steps described earlier:
+
+#. The proxy server makes simultaneous requests to participating nodes.
+#. As soon as the proxy has the fragments it needs, it calls on PyECLib to
+   decode the data.
+#. The proxy streams the decoded data it has back to the client.
+#. Repeat until the proxy is done sending data back to the client.
+
+The GET path will attempt to contact all nodes participating in the EC scheme,
+if not enough primaries respond then handoffs will be contacted just as with
+replication.  Etag and content length headers are updated for the client
+response following reconstruction as the individual fragment archives metadata
+is valid only for that fragment archive.
+
+Object Server
+-------------
+
+The Object Server, like the Proxy Server, supports MIME conversations as
+described in the proxy section earlier. This includes processing of the commit
+message and decoding various sections of the MIME document to extract the footer
+which includes things like the entire object etag.
+
+DiskFile
+========
+
+Erasure code uses subclassed ``ECDiskFile``, ``ECDiskFileWriter`` and
+``ECDiskFileManager`` to impement EC specific handling of on disk files.  This
+includes things like file name manipulation to include the fragment index in the
+filename, determination of valid .data files based on .durable presence,
+construction of EC specific hashes.pkl file to include fragment index
+information, etc., etc.
+
+Metadata
+--------
+
+There are few different categories of metadata that are associated with EC:
+
+System Metadata: EC has a set of object level system metadata that it
+attaches to each of the EC archives.  The metadata is for internal use only:
+
+* ``X-Object-Sysmeta-EC-Etag``:  The Etag of the original object.
+* ``X-Object-Sysmeta-EC-Content-Length``: The content length of the original
+  object.
+* ``X-Object-Sysmeta-EC-Frag-Index``: The fragment index for the object.
+* ``X-Object-Sysmeta-EC-Scheme``: Description of the EC policy used to encode
+  the object.
+* ``X-Object-Sysmeta-EC-Segment-Size``: The segment size used for the object.
+
+User Metadata:  User metadata is unaffected by EC, however, a full copy of the
+user metadata is stored with every EC archive.  This is required as the
+reconstructor needs this information and each reconstructor only communicates
+with its closest neighbors on the ring.
+
+PyECLib Metadata:  PyECLib stores a small amount of metadata on a per fragment
+basis.  This metadata is not documented here as it is opaque to Swift.
+
+Database Updates
+----------------
+
+As account and container rings are not associated with a Storage Policy, there
+is no change to how these database updates occur when using an EC policy.
+
+The Reconstructor
+-----------------
+
+The Reconstructor performs analogous functions to the replicator:
+
+#. Recovery from disk drive failure.
+#. Moving data around because of a rebalance.
+#. Reverting data back to a primary from a handoff.
+#. Recovering fragment archives from bit rot discovered by the auditor.
+
+However, under the hood it operates quite differently.  The following are some
+of the key elements in understanding how the reconstructor operates.
+
+Unlike the replicator, the work that the reconstructor does is not always as
+easy to break down into the 2 basic tasks of synchronize or revert (move data
+from handoff back to primary) because of the fact that one storage node can
+house fragment archives of various indexes and each index really /"belongs/" to
+a different node.  So, whereas when the replicator is reverting data from a
+handoff it has just one node to send its data to, the reconstructor can have
+several.  Additionally, its not always the case that the processing of a
+particular suffix directory means one or the other for the entire directory (as
+it does for replication). The scenarios that create these mixed situations can
+be pretty complex so we will just focus on what the reconstructor does here and
+not a detailed explanation of why.
+
+Job Construction and Processing
+===============================
+
+Because of the nature of the work it has to do as described above, the
+reconstructor builds jobs for a single job processor.  The job itself contains
+all of the information needed for the processor to execute the job which may be
+a synchronization or a data reversion and there may be a mix of jobs that
+perform both of these operations on the same suffix directory.
+
+Jobs are constructed on a per partition basis and then per fragment index basis.
+That is, there will be one job for every fragment index in a partition.
+Performing this construction \"up front\" like this helps minimize the
+interaction between nodes collecting hashes.pkl information.
+
+Once a set of jobs for a partition has been constructed, those jobs are sent off
+to threads for execution. The single job processor then performs the necessary
+actions working closely with ssync to carry out its instructions.  For data
+reversion, the actual objects themselves are cleaned up via the ssync module and
+once that partition's set of jobs is complete, the reconstructor will attempt to
+remove the relevant directory structures.
+
+The scenarios that job construction has to take into account include:
+
+#. A partition directory with all fragment indexes matching the local node
+   index.  This is the case where everything is where it belongs and we just
+   need to compare hashes and sync if needed, here we sync with our partners.
+#. A partition directory with one local fragment index and mix of others.  Here
+   we need to sync with our partners where fragment indexes matches the
+   local_id, all others are sync'd with their home nodes and then deleted.
+#. A partition directory with no local fragment index and just one or more of
+   others. Here we sync with just the home nodes for the fragment indexes that
+   we have and then all the local archives are deleted.  This is the basic
+   handoff reversion case.
+
+.. note::
+    A \"home node\" is the node where the fragment index encoded in the
+    fragment archive's filename matches the node index of a node in the primary
+    partition list.
+
+Node Communication
+==================
+
+The replicators talk to all nodes who have a copy of their object, typically
+just 2 other nodes.  For EC, having each reconstructor node talk to all nodes
+would incur a large amount of overhead as there will typically be a much larger
+number of nodes participating in the EC scheme.  Therefore, the reconstructor is
+built to talk to its adjacent nodes on the ring only.  These nodes are typically
+referred to as partners.
+
+Reconstruction
+==============
+
+Reconstruction can be thought of sort of like replication but with an extra step
+in the middle. The reconstructor is hard-wired to use ssync to determine what is
+missing and desired by the other side. However, before an object is sent over
+the wire it needs to be reconstructed from the remaining fragments as the local
+fragment is just that - a different fragment index than what the other end is
+asking for.
+
+Thus, there are hooks in ssync for EC based policies. One case would be for
+basic reconstruction which, at a high level, looks like this:
+
+* Determine which nodes need to be contacted to collect other EC archives needed
+  to perform reconstruction.
+* Update the etag and fragment index metadata elements of the newly constructed
+  fragment archive.
+* Establish a connection to the target nodes and give ssync a DiskFileLike class
+  that it can stream data from.
+
+The reader in this class gathers fragments from the nodes and uses PyECLib to
+reconstruct each segment before yielding data back to ssync. Essentially what
+this means is that data is buffered, in memory, on a per segment basis at the
+node performing reconstruction and each segment is dynamically reconstructed and
+delivered to `ssync_sender` where the `send_put()` method will ship them on
+over.  The sender is then responsible for deleting the objects as they are sent
+in the case of data reversion.
+
+The Auditor
+-----------
+
+Because the auditor already operates on a per storage policy basis, there are no
+specific auditor changes associated with EC.  Each EC archive looks like, and is
+treated like, a regular object from the perspective of the auditor.  Therefore,
+if the auditor finds bit-rot in an EC archive, it simply quarantines it and the
+reconstructor will take care of the rest just as the replicator does for
+replication policies.
--- a/doc/source/overview_policies.rst
+++ b/doc/source/overview_policies.rst
@ -8,22 +8,22 @@ feature is implemented throughout the entire code base so it is an important
 concept in understanding Swift architecture.

 As described in :doc:`overview_ring`, Swift uses modified hashing rings to
-determine where data should reside in the cluster. There is a separate ring
-for account databases, container databases, and there is also one object
-ring per storage policy.  Each object ring behaves exactly the same way
-and is maintained in the same manner, but with policies, different devices
-can belong to different rings with varying levels of replication. By supporting
-multiple object rings, Swift allows the application and/or deployer to
-essentially segregate the object storage within a single cluster.  There are
-many reasons why this might be desirable:
+determine where data should reside in the cluster. There is a separate ring for
+account databases, container databases, and there is also one object ring per
+storage policy.  Each object ring behaves exactly the same way and is maintained
+in the same manner, but with policies, different devices can belong to different
+rings. By supporting multiple object rings, Swift allows the application and/or
+deployer to essentially segregate the object storage within a single cluster.
+There are many reasons why this might be desirable:

-* Different levels of replication:  If a provider wants to offer, for example,
-  2x replication and 3x replication but doesn't want to maintain 2 separate clusters,
-  they would setup a 2x policy and a 3x policy and assign the nodes to their
-  respective rings.
+* Different levels of durability:  If a provider wants to offer, for example,
+  2x replication and 3x replication but doesn't want to maintain 2 separate
+  clusters, they would setup a 2x and a 3x replication policy and assign the
+  nodes to their respective rings. Furthermore, if a provider wanted to offer a
+  cold storage tier, they could create an erasure coded policy.

-* Performance:  Just as SSDs can be used as the exclusive members of an account or
-  database ring, an SSD-only object ring can be created as well and used to
+* Performance:  Just as SSDs can be used as the exclusive members of an account
+  or database ring, an SSD-only object ring can be created as well and used to
  implement a low-latency/high performance policy.

 * Collecting nodes into group:  Different object rings may have different
@ -36,10 +36,12 @@ many reasons why this might be desirable:

 .. note::

-    Today, choosing a different storage policy allows the use of different
-    object rings, but future policies (such as Erasure Coding) will also
-    change some of the actual code paths when processing a request.  Also note
-    that Diskfile refers to backend object storage plug-in architecture.
+    Today, Swift supports two different policy types: Replication and Erasure
+    Code. Erasure Code policy is currently a beta release and should not be
+    used in a Production cluster. See :doc:`overview_erasure_code` for details.
+
+    Also note that Diskfile refers to backend object storage plug-in
+    architecture. See :doc:`development_ondisk_backends` for details.

 -----------------------
 Containers and Policies
@ -61,31 +63,33 @@ Policy-0 is considered the default).  We will be covering the difference
 between default and Policy-0 in the next section.

 Policies are assigned when a container is created.  Once a container has been
-assigned a policy, it cannot be changed (unless it is deleted/recreated).  The implications
-on data placement/movement for large datasets would make this a task best left for
-applications to perform. Therefore, if a container has an existing policy of,
-for example 3x replication, and one wanted to migrate that data to a policy that specifies
-a different replication level, the application would create another container
-specifying the other policy name and then simply move the data from one container
-to the other.  Policies apply on a per container basis allowing for minimal application
-awareness; once a container has been created with a specific policy, all objects stored
-in it will be done so in accordance with that policy.  If a container with a
-specific name is deleted (requires the container be empty) a new container may
-be created with the same name without any restriction on storage policy
-enforced by the deleted container which previously shared the same name.
+assigned a policy, it cannot be changed (unless it is deleted/recreated).  The
+implications on data placement/movement for large datasets would make this a
+task best left for applications to perform. Therefore, if a container has an
+existing policy of, for example 3x replication, and one wanted to migrate that
+data to an Erasure Code policy, the application would create another container
+specifying the other policy parameters and then simply move the data from one
+container to the other.  Policies apply on a per container basis allowing for
+minimal application awareness; once a container has been created with a specific
+policy, all objects stored in it will be done so in accordance with that policy.
+If a container with a specific name is deleted (requires the container be empty)
+a new container may be created with the same name without any restriction on
+storage policy enforced by the deleted container which previously shared the
+same name.

 Containers have a many-to-one relationship with policies meaning that any number
-of containers can share one policy.  There is no limit to how many containers can use
-a specific policy.
+of containers can share one policy.  There is no limit to how many containers
+can use a specific policy.

-The notion of associating a ring with a container introduces an interesting scenario:
-What would happen if 2 containers of the same name were created with different
-Storage Policies on either side of a network outage at the same time?  Furthermore,
-what would happen if objects were placed in those containers, a whole bunch of them,
-and then later the network outage was restored?  Well, without special care it would
-be a big problem as an application could end up using the wrong ring to try and find
-an object.  Luckily there is a solution for this problem, a daemon known as the
-Container Reconciler works tirelessly to identify and rectify this potential scenario.
+The notion of associating a ring with a container introduces an interesting
+scenario: What would happen if 2 containers of the same name were created with
+different Storage Policies on either side of a network outage at the same time?
+Furthermore, what would happen if objects were placed in those containers, a
+whole bunch of them, and then later the network outage was restored?  Well,
+without special care it would be a big problem as an application could end up
+using the wrong ring to try and find an object.  Luckily there is a solution for
+this problem, a daemon known as the Container Reconciler works tirelessly to
+identify and rectify this potential scenario.

 --------------------
 Container Reconciler
@ -184,9 +188,9 @@ this case we would not use the default as it might not have the same
 policy as legacy containers.  When no other policies are defined, Swift
 will always choose ``Policy-0`` as the default.

-In other words, default means "create using this policy if nothing else is specified"
-and ``Policy-0`` means "use the legacy policy if a container doesn't have one" which
-really means use ``object.ring.gz`` for lookups.
+In other words, default means "create using this policy if nothing else is
+specified" and ``Policy-0`` means "use the legacy policy if a container doesn't
+have one" which really means use ``object.ring.gz`` for lookups.

 .. note::

@ -244,17 +248,19 @@ not mark the policy as deprecated to all nodes.
 Configuring Policies
 --------------------

-Policies are configured in ``swift.conf`` and it is important that the deployer have a solid
-understanding of the semantics for configuring policies.  Recall that a policy must have
-a corresponding ring file, so configuring a policy is a two-step process.  First, edit
-your ``/etc/swift/swift.conf`` file to add your new policy and, second, create the
-corresponding policy object ring file.
+Policies are configured in ``swift.conf`` and it is important that the deployer
+have a solid understanding of the semantics for configuring policies.  Recall
+that a policy must have a corresponding ring file, so configuring a policy is a
+two-step process.  First, edit your ``/etc/swift/swift.conf`` file to add your
+new policy and, second, create the corresponding policy object ring file.

-See :doc:`policies_saio` for a step by step guide on adding a policy to the SAIO setup.
+See :doc:`policies_saio` for a step by step guide on adding a policy to the SAIO
+setup.

-Note that each policy has a section starting with ``[storage-policy:N]`` where N is the
-policy index.  There's no reason other than readability that these be sequential but there
-are a number of rules enforced by Swift when parsing this file:
+Note that each policy has a section starting with ``[storage-policy:N]`` where N
+is the policy index.  There's no reason other than readability that these be
+sequential but there are a number of rules enforced by Swift when parsing this
+file:

    * If a policy with index 0 is not declared and no other policies defined,
      Swift will create one
@ -269,9 +275,11 @@ are a number of rules enforced by Swift when parsing this file:
    * The policy name 'Policy-0' can only be used for the policy with index 0
    * If any policies are defined, exactly one policy must be declared default
    * Deprecated policies cannot be declared the default
+    * If no ``policy_type`` is provided, ``replication`` is the default value.

-The following is an example of a properly configured ``swift.conf`` file. See :doc:`policies_saio`
-for full instructions on setting up an all-in-one with this example configuration.::
+The following is an example of a properly configured ``swift.conf`` file. See
+:doc:`policies_saio` for full instructions on setting up an all-in-one with this
+example configuration.::

        [swift-hash]
        # random unique strings that can never change (DO NOT LOSE)
@ -280,10 +288,12 @@ for full instructions on setting up an all-in-one with this example configuratio

        [storage-policy:0]
        name = gold
+        policy_type = replication
        default = yes

        [storage-policy:1]
        name = silver
+        policy_type = replication
        deprecated = yes

 Review :ref:`default-policy` and :ref:`deprecate-policy` for more
@ -300,11 +310,14 @@ There are some other considerations when managing policies:
      the desired policy section, but a deprecated policy may not also
      be declared the default, and you must specify a default - so you
      must have policy which is not deprecated at all times.
+    * The option ``policy_type`` is used to distinguish between different
+      policy types. The default value is ``replication``. When defining an EC
+      policy use the value ``erasure_coding``.
+    * The EC policy has additional required parameters. See
+      :doc:`overview_erasure_code` for details.

-There will be additional parameters for policies as new features are added
-(e.g., Erasure Code), but for now only a section name/index and name are
-required.  Once ``swift.conf`` is configured for a new policy, a new ring must be
-created.  The ring tools are not policy name aware so it's critical that the
+Once ``swift.conf`` is configured for a new policy, a new ring must be created.
+The ring tools are not policy name aware so it's critical that the
 correct policy index be used when creating the new policy's ring file.
 Additional object rings are created in the same manner as the legacy ring
 except that '-N' is appended after the word ``object`` where N matches the
@ -404,43 +417,47 @@ Middleware
 ----------

 Middleware can take advantage of policies through the :data:`.POLICIES` global
-and by importing :func:`.get_container_info` to gain access to the policy
-index associated with the container in question.  From the index it
-can then use the :data:`.POLICIES` singleton to grab the right ring.  For example,
+and by importing :func:`.get_container_info` to gain access to the policy index
+associated with the container in question.  From the index it can then use the
+:data:`.POLICIES` singleton to grab the right ring.  For example,
 :ref:`list_endpoints` is policy aware using the means just described. Another
 example is :ref:`recon` which will report the md5 sums for all of the rings.

 Proxy Server
 ------------

-The :ref:`proxy-server` module's role in Storage Policies is essentially to make sure the
-correct ring is used as its member element.  Before policies, the one object ring
-would be instantiated when the :class:`.Application` class was instantiated and could
-be overridden by test code via init parameter.  With policies, however, there is
-no init parameter and the :class:`.Application` class instead depends on the :data:`.POLICIES`
-global singleton to retrieve the ring which is instantiated the first time it's
-needed.  So, instead of an object ring member of the :class:`.Application` class, there is
-an accessor function, :meth:`~.Application.get_object_ring`, that gets the ring from :data:`.POLICIES`.
+The :ref:`proxy-server` module's role in Storage Policies is essentially to make
+sure the correct ring is used as its member element.  Before policies, the one
+object ring would be instantiated when the :class:`.Application` class was
+instantiated and could be overridden by test code via init parameter.  With
+policies, however, there is no init parameter and the :class:`.Application`
+class instead depends on the :data:`.POLICIES` global singleton to retrieve the
+ring which is instantiated the first time it's needed.  So, instead of an object
+ring member of the :class:`.Application` class, there is an accessor function,
+:meth:`~.Application.get_object_ring`, that gets the ring from
+:data:`.POLICIES`.

 In general, when any module running on the proxy requires an object ring, it
 does so via first getting the policy index from the cached container info.  The
 exception is during container creation where it uses the policy name from the
-request header to look up policy index from the :data:`.POLICIES` global.  Once the
-proxy has determined the policy index, it can use the :meth:`~.Application.get_object_ring` method
-described earlier to gain access to the correct ring.  It then has the responsibility
-of passing the index information, not the policy name, on to the back-end servers
-via the header ``X-Backend-Storage-Policy-Index``. Going the other way, the proxy also
-strips the index out of headers that go back to clients, and makes sure they only
-see the friendly policy names.
+request header to look up policy index from the :data:`.POLICIES` global.  Once
+the proxy has determined the policy index, it can use the
+:meth:`~.Application.get_object_ring` method described earlier to gain access to
+the correct ring.  It then has the responsibility of passing the index
+information, not the policy name, on to the back-end servers via the header ``X
+-Backend-Storage-Policy-Index``. Going the other way, the proxy also strips the
+index out of headers that go back to clients, and makes sure they only see the
+friendly policy names.

 On Disk Storage
 ---------------

-Policies each have their own directories on the back-end servers and are identified by
-their storage policy indexes.  Organizing the back-end directory structures by policy
-index helps keep track of things and also allows for sharing of disks between policies
-which may or may not make sense depending on the needs of the provider.  More
-on this later, but for now be aware of the following directory naming convention:
+Policies each have their own directories on the back-end servers and are
+identified by their storage policy indexes.  Organizing the back-end directory
+structures by policy index helps keep track of things and also allows for
+sharing of disks between policies which may or may not make sense depending on
+the needs of the provider.  More on this later, but for now be aware of the
+following directory naming convention:

 * ``/objects`` maps to objects associated with Policy-0
 * ``/objects-N`` maps to storage policy index #N
@ -466,19 +483,19 @@ policy index and leaves the actual directory naming/structure mechanisms to
 :class:`.Diskfile` being used will assure that data is properly located in the
 tree based on its policy.

-For the same reason, the :ref:`object-updater` also is policy aware.  As previously
-described, different policies use different async pending directories so the
-updater needs to know how to scan them appropriately.
+For the same reason, the :ref:`object-updater` also is policy aware.  As
+previously described, different policies use different async pending directories
+so the updater needs to know how to scan them appropriately.

-The :ref:`object-replicator` is policy aware in that, depending on the policy, it may have to
-do drastically different things, or maybe not.  For example, the difference in
-handling a replication job for 2x versus 3x is trivial; however, the difference in
-handling replication between 3x and erasure code is most definitely not.  In
-fact, the term 'replication' really isn't appropriate for some policies
-like erasure code; however, the majority of the framework for collecting and
-processing jobs is common.  Thus, those functions in the replicator are
-leveraged for all policies and then there is policy specific code required for
-each policy, added when the policy is defined if needed.
+The :ref:`object-replicator` is policy aware in that, depending on the policy,
+it may have to do drastically different things, or maybe not.  For example, the
+difference in handling a replication job for 2x versus 3x is trivial; however,
+the difference in handling replication between 3x and erasure code is most
+definitely not.  In fact, the term 'replication' really isn't appropriate for
+some policies like erasure code; however, the majority of the framework for
+collecting and processing jobs is common.  Thus, those functions in the
+replicator are leveraged for all policies and then there is policy specific code
+required for each policy, added when the policy is defined if needed.

 The ssync functionality is policy aware for the same reason. Some of the
 other modules may not obviously be affected, but the back-end directory
@ -487,25 +504,26 @@ parameter.  Therefore ssync being policy aware really means passing the
 policy index along.  See :class:`~swift.obj.ssync_sender` and
 :class:`~swift.obj.ssync_receiver` for more information on ssync.

-For :class:`.Diskfile` itself, being policy aware is all about managing the back-end
-structure using the provided policy index.  In other words, callers who get
-a :class:`.Diskfile` instance provide a policy index and :class:`.Diskfile`'s job is to keep data
-separated via this index (however it chooses) such that policies can share
-the same media/nodes if desired.  The included implementation of :class:`.Diskfile`
-lays out the directory structure described earlier but that's owned within
-:class:`.Diskfile`; external modules have no visibility into that detail.  A common
-function is provided to map various directory names and/or strings
-based on their policy index. For example :class:`.Diskfile` defines :func:`.get_data_dir`
-which builds off of a generic :func:`.get_policy_string` to consistently build
-policy aware strings for various usage.
+For :class:`.Diskfile` itself, being policy aware is all about managing the
+back-end structure using the provided policy index.  In other words, callers who
+get a :class:`.Diskfile` instance provide a policy index and
+:class:`.Diskfile`'s job is to keep data separated via this index (however it
+chooses) such that policies can share the same media/nodes if desired.  The
+included implementation of :class:`.Diskfile` lays out the directory structure
+described earlier but that's owned within :class:`.Diskfile`; external modules
+have no visibility into that detail.  A common function is provided to map
+various directory names and/or strings based on their policy index. For example
+:class:`.Diskfile` defines :func:`.get_data_dir` which builds off of a generic
+:func:`.get_policy_string` to consistently build policy aware strings for
+various usage.

 Container Server
 ----------------

-The :ref:`container-server` plays a very important role in Storage Policies, it is
-responsible for handling the assignment of a policy to a container and the
-prevention of bad things like changing policies or picking the wrong policy
-to use when nothing is specified (recall earlier discussion on Policy-0 versus
+The :ref:`container-server` plays a very important role in Storage Policies, it
+is responsible for handling the assignment of a policy to a container and the
+prevention of bad things like changing policies or picking the wrong policy to
+use when nothing is specified (recall earlier discussion on Policy-0 versus
 default).

 The :ref:`container-updater` is policy aware, however its job is very simple, to
@ -538,19 +556,19 @@ migrated to be fully compatible with the post-storage-policy queries without
 having to fall back and retry queries with the legacy schema to service
 container read requests.

-The :ref:`container-sync-daemon` functionality only needs to be policy aware in that it
-accesses the object rings.  Therefore, it needs to pull the policy index
-out of the container information and use it to select the appropriate
-object ring from the :data:`.POLICIES` global.
+The :ref:`container-sync-daemon` functionality only needs to be policy aware in
+that it accesses the object rings.  Therefore, it needs to pull the policy index
+out of the container information and use it to select the appropriate object
+ring from the :data:`.POLICIES` global.

 Account Server
 --------------

-The :ref:`account-server`'s role in Storage Policies is really limited to reporting.
-When a HEAD request is made on an account (see example provided earlier),
-the account server is provided with the storage policy index and builds
-the ``object_count`` and ``byte_count`` information for the client on a per
-policy basis.
+The :ref:`account-server`'s role in Storage Policies is really limited to
+reporting. When a HEAD request is made on an account (see example provided
+earlier), the account server is provided with the storage policy index and
+builds the ``object_count`` and ``byte_count`` information for the client on a
+per policy basis.

 The account servers are able to report per-storage-policy object and byte
 counts because of some policy specific DB schema changes.  A policy specific
@ -564,23 +582,23 @@ pre-storage-policy accounts by altering the DB schema and populating the
 point in time.

 The per-storage-policy object and byte counts are not updated with each object
-PUT and DELETE request, instead container updates to the account server are performed
-asynchronously by the ``swift-container-updater``.
+PUT and DELETE request, instead container updates to the account server are
+performed asynchronously by the ``swift-container-updater``.

 .. _upgrade-policy:

 Upgrading and Confirming Functionality
 --------------------------------------

-Upgrading to a version of Swift that has Storage Policy support is not difficult,
-in fact, the cluster administrator isn't required to make any special configuration
-changes to get going.  Swift will automatically begin using the existing object
-ring as both the default ring and the Policy-0 ring.  Adding the declaration of
-policy 0 is totally optional and in its absence, the name given to the implicit
-policy 0 will be 'Policy-0'.  Let's say for testing purposes that you wanted to take
-an existing cluster that already has lots of data on it and upgrade to Swift with
-Storage Policies. From there you want to go ahead and create a policy and test a
-few things out.  All you need to do is:
+Upgrading to a version of Swift that has Storage Policy support is not
+difficult, in fact, the cluster administrator isn't required to make any special
+configuration changes to get going.  Swift will automatically begin using the
+existing object ring as both the default ring and the Policy-0 ring.  Adding the
+declaration of policy 0 is totally optional and in its absence, the name given
+to the implicit policy 0 will be 'Policy-0'.  Let's say for testing purposes
+that you wanted to take an existing cluster that already has lots of data on it
+and upgrade to Swift with Storage Policies. From there you want to go ahead and
+create a policy and test a few things out.  All you need to do is:

  #. Upgrade all of your Swift nodes to a policy-aware version of Swift
  #. Define your policies in ``/etc/swift/swift.conf``
--- a/doc/source/overview_replication.rst
+++ b/doc/source/overview_replication.rst
@ -111,11 +111,53 @@ Another improvement planned all along the way is separating the local disk
 structure from the protocol path structure. This separation will allow ring
 resizing at some point, or at least ring-doubling.

-FOR NOW, IT IS NOT RECOMMENDED TO USE SSYNC ON PRODUCTION CLUSTERS. Some of us
-will be in a limited fashion to look for any subtle issues, tuning, etc. but
-generally ssync is an experimental feature. In its current implementation it is
-probably going to be a bit slower than RSync, but if all goes according to plan
-it will end up much faster.
+Note that for objects being stored with an Erasure Code policy, the replicator
+daemon is not involved.  Instead, the reconstructor is used by Erasure Code
+policies and is analogous to the replicator for Replication type policies.
+See :doc:`overview_erasure_code` for complete information on both Erasure Code
+support as well as the reconstructor.
+
+----------
+Hashes.pkl
+----------
+
+The hashes.pkl file is a key element for both replication and reconstruction
+(for Erasure Coding).  Both daemons use this file to determine if any kind of
+action is required between nodes that are participating in the durability
+scheme.  The file itself is a pickled dictionary with slightly different
+formats depending on whether the policy is Replication or Erasure Code.  In
+either case, however, the same basic information is provided between the
+nodes.  The dictionary contains a dictionary where the key is a suffix
+directory name and the value is the MD5 hash of the directory listing for
+that suffix.  In this manner, the daemon can quickly identify differences
+between local and remote suffix directories on a per partition basis as the
+scope of any one hashes.pkl file is a partition directory.
+
+For Erasure Code policies, there is a little more information required.  An
+object's hash directory may contain multiple fragments of a single object in
+the event that the node is acting as a handoff or perhaps if a rebalance is
+underway.  Each fragment of an object is stored with a fragment index, so
+the hashes.pkl for an Erasure Code partition will still be a dictionary
+keyed on the suffix directory name, however, the value is another dictionary
+keyed on the fragment index with subsequent MD5 hashes for each one as
+values.  Some files within an object hash directory don't require a fragment
+index so None is used to represent those.  Below are examples of what these
+dictionaries might look like.
+
+Replication hashes.pkl::
+
+    {'a43': '72018c5fbfae934e1f56069ad4425627',
+     'b23': '12348c5fbfae934e1f56069ad4421234'}
+
+Erasure Code hashes.pkl::
+
+    {'a43': {None: '72018c5fbfae934e1f56069ad4425627',
+             2: 'b6dd6db937cb8748f50a5b6e4bc3b808'},
+     'b23': {None: '12348c5fbfae934e1f56069ad4421234',
+             1: '45676db937cb8748f50a5b6e4bc34567'}}
+
+
+


 -----------------------------
--- a/etc/container-server.conf-sample
+++ b/etc/container-server.conf-sample
@ -167,6 +167,14 @@ use = egg:swift#recon
 #
 # Maximum amount of time to spend syncing each container per pass
 # container_time = 60
+#
+# Maximum amount of time in seconds for the connection attempt
+# conn_timeout = 5
+# Server errors from requests will be retried by default
+# request_tries = 3
+#
+# Internal client config file path
+# internal_client_conf_path = /etc/swift/internal-client.conf

 # Note: Put it at the beginning of the pipeline to profile all middleware. But
 # it is safer to put this after healthcheck.
--- a/etc/internal-client.conf-sample
+++ b/etc/internal-client.conf-sample
@ -0,0 +1,42 @@
+[DEFAULT]
+# swift_dir = /etc/swift
+# user = swift
+# You can specify default log routing here if you want:
+# log_name = swift
+# log_facility = LOG_LOCAL0
+# log_level = INFO
+# log_address = /dev/log
+#
+# comma separated list of functions to call to setup custom log handlers.
+# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
+# adapted_logger
+# log_custom_handlers =
+#
+# If set, log_udp_host will override log_address
+# log_udp_host =
+# log_udp_port = 514
+#
+# You can enable StatsD logging here:
+# log_statsd_host = localhost
+# log_statsd_port = 8125
+# log_statsd_default_sample_rate = 1.0
+# log_statsd_sample_rate_factor = 1.0
+# log_statsd_metric_prefix =
+
+[pipeline:main]
+pipeline = catch_errors proxy-logging cache proxy-server
+
+[app:proxy-server]
+use = egg:swift#proxy
+# See proxy-server.conf-sample for options
+
+[filter:cache]
+use = egg:swift#memcache
+# See proxy-server.conf-sample for options
+
+[filter:proxy-logging]
+use = egg:swift#proxy_logging
+
+[filter:catch_errors]
+use = egg:swift#catch_errors
+# See proxy-server.conf-sample for options
--- a/etc/object-server.conf-sample
+++ b/etc/object-server.conf-sample
@ -211,6 +211,29 @@ use = egg:swift#recon
 # removed  when it has successfully replicated to all the canonical nodes.
 # handoff_delete = auto

+[object-reconstructor]
+# You can override the default log routing for this app here (don't use set!):
+# Unless otherwise noted, each setting below has the same meaning as described
+# in the [object-replicator] section, however these settings apply to the EC
+# reconstructor
+#
+# log_name = object-reconstructor
+# log_facility = LOG_LOCAL0
+# log_level = INFO
+# log_address = /dev/log
+#
+# daemonize = on
+# run_pause = 30
+# concurrency = 1
+# stats_interval = 300
+# node_timeout = 10
+# http_timeout = 60
+# lockup_timeout = 1800
+# reclaim_age = 604800
+# ring_check_interval = 15
+# recon_cache_path = /var/cache/swift
+# handoffs_first = False
+
 [object-updater]
 # You can override the default log routing for this app here (don't use set!):
 # log_name = object-updater
--- a/etc/proxy-server.conf-sample
+++ b/etc/proxy-server.conf-sample
@ -17,9 +17,10 @@ bind_port = 8080
 # to /info.  You can withhold subsections by separating the dict level with a
 # ".".  The following would cause the sections 'container_quotas' and 'tempurl'
 # to not be listed, and the key max_failed_deletes would be removed from
-# bulk_delete.  Default is empty, allowing all registered features to be listed
-# via HTTP GET /info.
-# disallowed_sections = container_quotas, tempurl, bulk_delete.max_failed_deletes
+# bulk_delete.  Default value is 'swift.valid_api_versions' which allows all
+# registered features to be listed via HTTP GET /info except
+# swift.valid_api_versions information
+# disallowed_sections = swift.valid_api_versions, container_quotas, tempurl

 # Use an integer to override the number of pre-forked processes that will
 # accept connections.  Should default to the number of effective cpu
--- a/etc/swift.conf-sample
+++ b/etc/swift.conf-sample
@ -22,9 +22,13 @@ swift_hash_path_prefix = changeme
 # defined you must define a policy with index 0 and you must specify a
 # default.  It is recommended you always define a section for
 # storage-policy:0.
+#
+# A 'policy_type' argument is also supported but is not mandatory.  Default
+# policy type 'replication' is used when 'policy_type' is unspecified.
 [storage-policy:0]
 name = Policy-0
 default = yes
+#policy_type = replication

 # the following section would declare a policy called 'silver', the number of
 # replicas will be determined by how the ring is built.  In this example the
@ -39,9 +43,45 @@ default = yes
 # current default.
 #[storage-policy:1]
 #name = silver
+#policy_type = replication
+
+# The following declares a storage policy of type 'erasure_coding' which uses
+# Erasure Coding for data reliability.  The 'erasure_coding' storage policy in
+# Swift is available as a "beta".  Please refer to Swift documentation for
+# details on how the 'erasure_coding' storage policy is implemented.
+#
+# Swift uses PyECLib, a Python Erasure coding API library, for encode/decode
+# operations.  Please refer to Swift documentation for details on how to
+# install PyECLib.
+#
+# When defining an EC policy, 'policy_type' needs to be 'erasure_coding' and
+# EC configuration parameters 'ec_type', 'ec_num_data_fragments' and
+# 'ec_num_parity_fragments' must be specified.  'ec_type' is chosen from the
+# list of EC backends supported by PyECLib.  The ring configured for the
+# storage policy must have it's "replica" count configured to
+# 'ec_num_data_fragments' + 'ec_num_parity_fragments' - this requirement is
+# validated when services start.  'ec_object_segment_size' is the amount of
+# data that will be buffered up before feeding a segment into the
+# encoder/decoder.  More information about these configuration options and
+# supported `ec_type` schemes is available in the Swift documentation.  Please
+# refer to Swift documentation for details on how to configure EC policies.
+#
+# The example 'deepfreeze10-4' policy defined below is a _sample_
+# configuration with 10 'data' and 4 'parity' fragments. 'ec_type'
+# defines the Erasure Coding scheme. 'jerasure_rs_vand' (Reed-Solomon
+# Vandermonde) is used as an example below.
+#
+#[storage-policy:2]
+#name = deepfreeze10-4
+#policy_type = erasure_coding
+#ec_type = jerasure_rs_vand
+#ec_num_data_fragments = 10
+#ec_num_parity_fragments = 4
+#ec_object_segment_size = 1048576
+

 # The swift-constraints section sets the basic constraints on data
-# saved in the swift cluster. These constraints are automatically 
+# saved in the swift cluster. These constraints are automatically
 # published by the proxy server in responses to /info requests.

 [swift-constraints]
@ -116,3 +156,14 @@ default = yes
 # of a container name

 #max_container_name_length = 256
+
+
+# By default all REST API calls should use "v1" or "v1.0" as the version string,
+# for example "/v1/account". This can be manually overridden to make this
+# backward-compatible, in case a different version string has been used before.
+# Use a comma-separated list in case of multiple allowed versions, for example
+# valid_api_versions = v0,v1,v2
+# This is only enforced for account, container and object requests. The allowed
+# api versions are by default excluded from /info.
+
+# valid_api_versions = v1,v1.0
--- a/requirements.txt
+++ b/requirements.txt
@ -3,9 +3,10 @@
 # process, which may cause wedges in the gate later.

 dnspython>=1.9.4
-eventlet>=0.9.15
+eventlet>=0.16.1,!=0.17.0
 greenlet>=0.3.1
 netifaces>=0.5,!=0.10.0,!=0.10.1
 pastedeploy>=1.3.3
 simplejson>=2.0.9
 xattr>=0.4
+PyECLib>=1.0.3
--- a/setup.cfg
+++ b/setup.cfg
@ -51,6 +51,7 @@ scripts =
    bin/swift-object-expirer
    bin/swift-object-info
    bin/swift-object-replicator
+    bin/swift-object-reconstructor
    bin/swift-object-server
    bin/swift-object-updater
    bin/swift-oldies
--- a/swift/account/reaper.py
+++ b/swift/account/reaper.py
@ -19,6 +19,7 @@ from swift import gettext_ as _
 from logging import DEBUG
 from math import sqrt
 from time import time
+import itertools

 from eventlet import GreenPool, sleep, Timeout

@ -432,7 +433,7 @@ class AccountReaper(Daemon):
        * See also: :func:`swift.common.ring.Ring.get_nodes` for a description
          of the container node dicts.
        """
-        container_nodes = list(container_nodes)
+        cnodes = itertools.cycle(container_nodes)
        try:
            ring = self.get_object_ring(policy_index)
        except PolicyError:
@ -443,7 +444,7 @@ class AccountReaper(Daemon):
        successes = 0
        failures = 0
        for node in nodes:
-            cnode = container_nodes.pop()
+            cnode = next(cnodes)
            try:
                direct_delete_object(
                    node, part, account, container, obj,
--- a/swift/cli/info.py
+++ b/swift/cli/info.py
@ -24,7 +24,7 @@ from swift.common.request_helpers import is_sys_meta, is_user_meta, \
 from swift.account.backend import AccountBroker, DATADIR as ABDATADIR
 from swift.container.backend import ContainerBroker, DATADIR as CBDATADIR
 from swift.obj.diskfile import get_data_dir, read_metadata, DATADIR_BASE, \
-    extract_policy_index
+    extract_policy
 from swift.common.storage_policy import POLICIES


@ -251,6 +251,10 @@ def print_obj_metadata(metadata):

    :raises: ValueError
    """
+    user_metadata = {}
+    sys_metadata = {}
+    other_metadata = {}
+
    if not metadata:
        raise ValueError('Metadata is None')
    path = metadata.pop('name', '')
@ -280,7 +284,25 @@ def print_obj_metadata(metadata):
    else:
        print 'Timestamp: Not found in metadata'

-    print 'User Metadata: %s' % metadata
+    for key, value in metadata.iteritems():
+        if is_user_meta('Object', key):
+            user_metadata[key] = value
+        elif is_sys_meta('Object', key):
+            sys_metadata[key] = value
+        else:
+            other_metadata[key] = value
+
+    def print_metadata(title, items):
+        print title
+        if items:
+            for meta_key in sorted(items):
+                print '  %s: %s' % (meta_key, items[meta_key])
+        else:
+            print '  No metadata found'
+
+    print_metadata('System Metadata:', sys_metadata)
+    print_metadata('User Metadata:', user_metadata)
+    print_metadata('Other Metadata:', other_metadata)


 def print_info(db_type, db_file, swift_dir='/etc/swift'):
@ -330,7 +352,7 @@ def print_obj(datafile, check_etag=True, swift_dir='/etc/swift',
    :param swift_dir: the path on disk to rings
    :param policy_name: optionally the name to use when finding the ring
    """
-    if not os.path.exists(datafile) or not datafile.endswith('.data'):
+    if not os.path.exists(datafile):
        print "Data file doesn't exist"
        raise InfoSystemExit()
    if not datafile.startswith(('/', './')):
@ -341,10 +363,7 @@ def print_obj(datafile, check_etag=True, swift_dir='/etc/swift',
    datadir = DATADIR_BASE

    # try to extract policy index from datafile disk path
-    try:
-        policy_index = extract_policy_index(datafile)
-    except ValueError:
-        pass
+    policy_index = int(extract_policy(datafile) or POLICIES.legacy)

    try:
        if policy_index:
--- a/swift/cli/recon.py
+++ b/swift/cli/recon.py
@ -330,6 +330,27 @@ class SwiftRecon(object):
            print("[async_pending] - No hosts returned valid data.")
        print("=" * 79)

+    def driveaudit_check(self, hosts):
+        """
+        Obtain and print drive audit error statistics
+
+        :param hosts: set of hosts to check. in the format of:
+            set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]
+        """
+        scan = {}
+        recon = Scout("driveaudit", self.verbose, self.suppress_errors,
+                      self.timeout)
+        print("[%s] Checking drive-audit errors" % self._ptime())
+        for url, response, status in self.pool.imap(recon.scout, hosts):
+            if status == 200:
+                scan[url] = response['drive_audit_errors']
+        stats = self._gen_stats(scan.values(), 'drive_audit_errors')
+        if stats['reported'] > 0:
+            self._print_stats(stats)
+        else:
+            print("[drive_audit_errors] - No hosts returned valid data.")
+        print("=" * 79)
+
    def umount_check(self, hosts):
        """
        Check for and print unmounted drives
@ -800,7 +821,7 @@ class SwiftRecon(object):
                print("No hosts returned valid data.")
        print("=" * 79)

-    def disk_usage(self, hosts, top=0, human_readable=False):
+    def disk_usage(self, hosts, top=0, lowest=0, human_readable=False):
        """
        Obtain and print disk usage statistics

@ -814,6 +835,7 @@ class SwiftRecon(object):
        raw_total_avail = []
        percents = {}
        top_percents = [(None, 0)] * top
+        low_percents = [(None, 100)] * lowest
        recon = Scout("diskusage", self.verbose, self.suppress_errors,
                      self.timeout)
        print("[%s] Checking disk usage now" % self._ptime())
@ -837,6 +859,13 @@ class SwiftRecon(object):
                                top_percents.sort(key=lambda x: -x[1])
                                top_percents.pop()
                                break
+                        for ident, oused in low_percents:
+                            if oused > used:
+                                low_percents.append(
+                                    (url + ' ' + entry['device'], used))
+                                low_percents.sort(key=lambda x: x[1])
+                                low_percents.pop()
+                                break
                stats[url] = hostusage

        for url in stats:
@ -882,6 +911,13 @@ class SwiftRecon(object):
                    url, device = ident.split()
                    host = urlparse(url).netloc.split(':')[0]
                    print('%.02f%%  %s' % (used, '%-15s %s' % (host, device)))
+        if low_percents:
+            print('LOWEST %s' % lowest)
+            for ident, used in low_percents:
+                if ident:
+                    url, device = ident.split()
+                    host = urlparse(url).netloc.split(':')[0]
+                    print('%.02f%%  %s' % (used, '%-15s %s' % (host, device)))

    def main(self):
        """
@ -930,8 +966,13 @@ class SwiftRecon(object):
                        "local copy")
        args.add_option('--sockstat', action="store_true",
                        help="Get cluster socket usage stats")
+        args.add_option('--driveaudit', action="store_true",
+                        help="Get drive audit error stats")
        args.add_option('--top', type='int', metavar='COUNT', default=0,
                        help='Also show the top COUNT entries in rank order.')
+        args.add_option('--lowest', type='int', metavar='COUNT', default=0,
+                        help='Also show the lowest COUNT entries in rank \
+                        order.')
        args.add_option('--all', action="store_true",
                        help="Perform all checks. Equal to \t\t\t-arudlq "
                        "--md5 --sockstat --auditor --updater --expirer")
@ -987,11 +1028,13 @@ class SwiftRecon(object):
                self.auditor_check(hosts)
            self.umount_check(hosts)
            self.load_check(hosts)
-            self.disk_usage(hosts, options.top, options.human_readable)
+            self.disk_usage(hosts, options.top, options.lowest,
+                            options.human_readable)
            self.get_ringmd5(hosts, swift_dir)
            self.quarantine_check(hosts)
            self.socket_usage(hosts)
            self.server_type_check(hosts)
+            self.driveaudit_check(hosts)
        else:
            if options.async:
                if self.server_type == 'object':
@ -1025,7 +1068,8 @@ class SwiftRecon(object):
            if options.loadstats:
                self.load_check(hosts)
            if options.diskusage:
-                self.disk_usage(hosts, options.top, options.human_readable)
+                self.disk_usage(hosts, options.top, options.lowest,
+                                options.human_readable)
            if options.md5:
                self.get_ringmd5(hosts, swift_dir)
                self.get_swiftconfmd5(hosts)
@ -1033,6 +1077,8 @@ class SwiftRecon(object):
                self.quarantine_check(hosts)
            if options.sockstat:
                self.socket_usage(hosts)
+            if options.driveaudit:
+                self.driveaudit_check(hosts)


 def main():
--- a/swift/cli/ringbuilder.py
+++ b/swift/cli/ringbuilder.py
@ -14,12 +14,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import logging
+
 from errno import EEXIST
 from itertools import islice, izip
 from operator import itemgetter
 from os import mkdir
 from os.path import basename, abspath, dirname, exists, join as pathjoin
-from sys import argv as sys_argv, exit, stderr
+from sys import argv as sys_argv, exit, stderr, stdout
 from textwrap import wrap
 from time import time
 import optparse
@ -32,7 +34,7 @@ from swift.common.ring.utils import validate_args, \
    validate_and_normalize_ip, build_dev_from_opts, \
    parse_builder_ring_filename_args, parse_search_value, \
    parse_search_values_from_opts, parse_change_values_from_opts, \
-    dispersion_report
+    dispersion_report, validate_device_name
 from swift.common.utils import lock_parent_directory

 MAJOR_VERSION = 1
@ -218,6 +220,9 @@ def _parse_add_values(argvish):
            while i < len(rest) and rest[i] != '_':
                i += 1
            device_name = rest[1:i]
+            if not validate_device_name(device_name):
+                raise ValueError('Invalid device name')
+
            rest = rest[i:]

            meta = ''
@ -831,6 +836,8 @@ swift-ring-builder <builder_file> rebalance [options]
                          help='Force a rebalanced ring to save even '
                          'if < 1% of parts changed')
        parser.add_option('-s', '--seed', help="seed to use for rebalance")
+        parser.add_option('-d', '--debug', action='store_true',
+                          help="print debug information")
        options, args = parser.parse_args(argv)

        def get_seed(index):
@ -841,6 +848,14 @@ swift-ring-builder <builder_file> rebalance [options]
            except IndexError:
                pass

+        if options.debug:
+            logger = logging.getLogger("swift.ring.builder")
+            logger.setLevel(logging.DEBUG)
+            handler = logging.StreamHandler(stdout)
+            formatter = logging.Formatter("%(levelname)s: %(message)s")
+            handler.setFormatter(formatter)
+            logger.addHandler(handler)
+
        devs_changed = builder.devs_changed
        try:
            last_balance = builder.get_balance()
@ -889,11 +904,12 @@ swift-ring-builder <builder_file> rebalance [options]
        status = EXIT_SUCCESS
        if builder.dispersion > 0:
            print '-' * 79
-            print('NOTE: Dispersion of %.06f indicates some parts are not\n'
-                  '      optimally dispersed.\n\n'
-                  '      You may want adjust some device weights, increase\n'
-                  '      the overload or review the dispersion report.' %
-                  builder.dispersion)
+            print(
+                'NOTE: Dispersion of %.06f indicates some parts are not\n'
+                '      optimally dispersed.\n\n'
+                '      You may want to adjust some device weights, increase\n'
+                '      the overload or review the dispersion report.' %
+                builder.dispersion)
            status = EXIT_WARNING
            print '-' * 79
        elif balance > 5 and balance / 100.0 > builder.overload:
--- a/swift/common/constraints.py
+++ b/swift/common/constraints.py
@ -35,6 +35,7 @@ CONTAINER_LISTING_LIMIT = 10000
 ACCOUNT_LISTING_LIMIT = 10000
 MAX_ACCOUNT_NAME_LENGTH = 256
 MAX_CONTAINER_NAME_LENGTH = 256
+VALID_API_VERSIONS = ["v1", "v1.0"]

 # If adding an entry to DEFAULT_CONSTRAINTS, note that
 # these constraints are automatically published by the
@ -52,6 +53,7 @@ DEFAULT_CONSTRAINTS = {
    'account_listing_limit': ACCOUNT_LISTING_LIMIT,
    'max_account_name_length': MAX_ACCOUNT_NAME_LENGTH,
    'max_container_name_length': MAX_CONTAINER_NAME_LENGTH,
+    'valid_api_versions': VALID_API_VERSIONS,
 }

 SWIFT_CONSTRAINTS_LOADED = False
@ -72,13 +74,17 @@ def reload_constraints():
        SWIFT_CONSTRAINTS_LOADED = True
        for name in DEFAULT_CONSTRAINTS:
            try:
-                value = int(constraints_conf.get('swift-constraints', name))
+                value = constraints_conf.get('swift-constraints', name)
            except NoOptionError:
                pass
            except NoSectionError:
                # We are never going to find the section for another option
                break
            else:
+                try:
+                    value = int(value)
+                except ValueError:
+                    value = utils.list_from_csv(value)
                OVERRIDE_CONSTRAINTS[name] = value
    for name, default in DEFAULT_CONSTRAINTS.items():
        value = OVERRIDE_CONSTRAINTS.get(name, default)
@ -204,6 +210,19 @@ def check_object_creation(req, object_name):
    return check_metadata(req, 'object')


+def check_dir(root, drive):
+    """
+    Verify that the path to the device is a directory and is a lesser
+    constraint that is enforced when a full mount_check isn't possible
+    with, for instance, a VM using loopback or partitions.
+
+    :param root:  base path where the dir is
+    :param drive: drive name to be checked
+    :returns: True if it is a valid directoy, False otherwise
+    """
+    return os.path.isdir(os.path.join(root, drive))
+
+
 def check_mount(root, drive):
    """
    Verify that the path to the device is a mount point and mounted.  This
@ -399,3 +418,13 @@ def check_account_format(req, account):
            request=req,
            body='Account name cannot contain slashes')
    return account
+
+
+def valid_api_version(version):
+    """ Checks if the requested version is valid.
+
+    Currently Swift only supports "v1" and "v1.0". """
+    global VALID_API_VERSIONS
+    if not isinstance(VALID_API_VERSIONS, list):
+        VALID_API_VERSIONS = [str(VALID_API_VERSIONS)]
+    return version in VALID_API_VERSIONS
--- a/swift/common/exceptions.py
+++ b/swift/common/exceptions.py
@ -31,10 +31,32 @@ class SwiftException(Exception):
    pass


+class PutterConnectError(Exception):
+
+    def __init__(self, status=None):
+        self.status = status
+
+
 class InvalidTimestamp(SwiftException):
    pass


+class InsufficientStorage(SwiftException):
+    pass
+
+
+class FooterNotSupported(SwiftException):
+    pass
+
+
+class MultiphasePUTNotSupported(SwiftException):
+    pass
+
+
+class SuffixSyncError(SwiftException):
+    pass
+
+
 class DiskFileError(SwiftException):
    pass

@ -103,6 +125,10 @@ class ConnectionTimeout(Timeout):
    pass


+class ResponseTimeout(Timeout):
+    pass
+
+
 class DriveNotMounted(SwiftException):
    pass

@ -173,6 +199,10 @@ class MimeInvalid(SwiftException):
    pass


+class APIVersionError(SwiftException):
+    pass
+
+
 class ClientException(Exception):

    def __init__(self, msg, http_scheme='', http_host='', http_port='',
--- a/swift/common/manager.py
+++ b/swift/common/manager.py
@ -33,7 +33,8 @@ ALL_SERVERS = ['account-auditor', 'account-server', 'container-auditor',
               'container-replicator', 'container-reconciler',
               'container-server', 'container-sync',
               'container-updater', 'object-auditor', 'object-server',
-               'object-expirer', 'object-replicator', 'object-updater',
+               'object-expirer', 'object-replicator',
+               'object-reconstructor', 'object-updater',
               'proxy-server', 'account-replicator', 'account-reaper']
 MAIN_SERVERS = ['proxy-server', 'account-server', 'container-server',
                'object-server']
@ -434,8 +435,11 @@ class Server(object):
        if not conf_files:
            # maybe there's a config file(s) out there, but I couldn't find it!
            if not kwargs.get('quiet'):
-                print _('Unable to locate config %sfor %s') % (
-                    ('number %s ' % number if number else ''), self.server)
+                if number:
+                    print _('Unable to locate config number %s for %s' % (
+                        number, self.server))
+                else:
+                    print _('Unable to locate config for %s' % (self.server))
            if kwargs.get('verbose') and not kwargs.get('quiet'):
                if found_conf_files:
                    print _('Found configs:')
--- a/swift/common/middleware/formpost.py
+++ b/swift/common/middleware/formpost.py
@ -218,7 +218,14 @@ class FormPost(object):
                        env, attrs['boundary'])
                    start_response(status, headers)
                    return [body]
-            except (FormInvalid, MimeInvalid, EOFError) as err:
+            except MimeInvalid:
+                body = 'FormPost: invalid starting boundary'
+                start_response(
+                    '400 Bad Request',
+                    (('Content-Type', 'text/plain'),
+                     ('Content-Length', str(len(body)))))
+                return [body]
+            except (FormInvalid, EOFError) as err:
                body = 'FormPost: %s' % err
                start_response(
                    '400 Bad Request',
--- a/swift/common/middleware/keystoneauth.py
+++ b/swift/common/middleware/keystoneauth.py
@ -106,9 +106,9 @@ class KeystoneAuth(object):
        operator_roles
        service_roles

-    For backward compatibility, no prefix implies the parameter
-    applies to all reseller_prefixes. Here is an example, using two
-    prefixes::
+    For backward compatibility, if either of these parameters is specified
+    without a prefix then it applies to all reseller_prefixes. Here is an
+    example, using two prefixes::

        reseller_prefix = AUTH, SERVICE
        # The next three lines have identical effects (since the first applies
@ -242,11 +242,11 @@ class KeystoneAuth(object):
        # using _integral_keystone_identity to replace current
        # _keystone_identity. The purpose of keeping it in this release it for
        # back compatibility.
-        if environ.get('HTTP_X_IDENTITY_STATUS') != 'Confirmed':
+        if (environ.get('HTTP_X_IDENTITY_STATUS') != 'Confirmed'
+            or environ.get(
+                'HTTP_X_SERVICE_IDENTITY_STATUS') not in (None, 'Confirmed')):
            return
-        roles = []
-        if 'HTTP_X_ROLES' in environ:
-            roles = environ['HTTP_X_ROLES'].split(',')
+        roles = list_from_csv(environ.get('HTTP_X_ROLES', ''))
        identity = {'user': environ.get('HTTP_X_USER_NAME'),
                    'tenant': (environ.get('HTTP_X_TENANT_ID'),
                               environ.get('HTTP_X_TENANT_NAME')),
--- a/swift/common/middleware/recon.py
+++ b/swift/common/middleware/recon.py
@ -53,6 +53,8 @@ class ReconMiddleware(object):
                                                  'container.recon')
        self.account_recon_cache = os.path.join(self.recon_cache_path,
                                                'account.recon')
+        self.drive_recon_cache = os.path.join(self.recon_cache_path,
+                                              'drive.recon')
        self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz')
        self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz')
        self.rings = [self.account_ring_path, self.container_ring_path]
@ -124,6 +126,11 @@ class ReconMiddleware(object):
        return self._from_recon_cache(['async_pending'],
                                      self.object_recon_cache)

+    def get_driveaudit_error(self):
+        """get # of drive audit errors"""
+        return self._from_recon_cache(['drive_audit_errors'],
+                                      self.drive_recon_cache)
+
    def get_replication_info(self, recon_type):
        """get replication info"""
        if recon_type == 'account':
@ -359,6 +366,8 @@ class ReconMiddleware(object):
            content = self.get_socket_info()
        elif rcheck == "version":
            content = self.get_version()
+        elif rcheck == "driveaudit":
+            content = self.get_driveaudit_error()
        else:
            content = "Invalid path: %s" % req.path
            return Response(request=req, status="404 Not Found",
--- a/swift/common/middleware/tempauth.py
+++ b/swift/common/middleware/tempauth.py
@ -399,7 +399,7 @@ class TempAuth(object):
            s = base64.encodestring(hmac.new(key, msg, sha1).digest()).strip()
            if s != sign:
                return None
-            groups = self._get_user_groups(account, account_user)
+            groups = self._get_user_groups(account, account_user, account_id)

        return groups

--- a/swift/common/request_helpers.py
+++ b/swift/common/request_helpers.py
@ -26,10 +26,12 @@ import time
 from contextlib import contextmanager
 from urllib import unquote
 from swift import gettext_ as _
+from swift.common.storage_policy import POLICIES
 from swift.common.constraints import FORMAT2CONTENT_TYPE
 from swift.common.exceptions import ListingIterError, SegmentError
 from swift.common.http import is_success
-from swift.common.swob import HTTPBadRequest, HTTPNotAcceptable
+from swift.common.swob import (HTTPBadRequest, HTTPNotAcceptable,
+                               HTTPServiceUnavailable)
 from swift.common.utils import split_path, validate_device_partition
 from swift.common.wsgi import make_subrequest

@ -82,21 +84,27 @@ def get_listing_content_type(req):
 def get_name_and_placement(request, minsegs=1, maxsegs=None,
                           rest_with_last=False):
    """
-    Utility function to split and validate the request path and
-    storage_policy_index.  The storage_policy_index is extracted from
-    the headers of the request and converted to an integer, and then the
-    args are passed through to :meth:`split_and_validate_path`.
+    Utility function to split and validate the request path and storage
+    policy.  The storage policy index is extracted from the headers of
+    the request and converted to a StoragePolicy instance.  The
+    remaining args are passed through to
+    :meth:`split_and_validate_path`.

    :returns: a list, result of :meth:`split_and_validate_path` with
-              storage_policy_index appended on the end
-    :raises: HTTPBadRequest
+              the BaseStoragePolicy instance appended on the end
+    :raises: HTTPServiceUnavailable if the path is invalid or no policy exists
+             with the extracted policy_index.
    """
-    policy_idx = request.headers.get('X-Backend-Storage-Policy-Index', '0')
-    policy_idx = int(policy_idx)
+    policy_index = request.headers.get('X-Backend-Storage-Policy-Index')
+    policy = POLICIES.get_by_index(policy_index)
+    if not policy:
+        raise HTTPServiceUnavailable(
+            body=_("No policy with index %s") % policy_index,
+            request=request, content_type='text/plain')
    results = split_and_validate_path(request, minsegs=minsegs,
                                      maxsegs=maxsegs,
                                      rest_with_last=rest_with_last)
-    results.append(policy_idx)
+    results.append(policy)
    return results


--- a/swift/common/ring/builder.py
+++ b/swift/common/ring/builder.py
@ -17,6 +17,7 @@ import bisect
 import copy
 import errno
 import itertools
+import logging
 import math
 import random
 import cPickle as pickle
@ -33,6 +34,16 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree, \
 MAX_BALANCE = 999.99


+try:
+    # python 2.7+
+    from logging import NullHandler
+except ImportError:
+    # python 2.6
+    class NullHandler(logging.Handler):
+        def emit(self, *a, **kw):
+            pass
+
+
 class RingBuilder(object):
    """
    Used to build swift.common.ring.RingData instances to be written to disk
@ -96,6 +107,11 @@ class RingBuilder(object):
        self._remove_devs = []
        self._ring = None

+        self.logger = logging.getLogger("swift.ring.builder")
+        if not self.logger.handlers:
+            # silence "no handler for X" error messages
+            self.logger.addHandler(NullHandler())
+
    def weight_of_one_part(self):
        """
        Returns the weight of each partition as calculated from the
@ -355,6 +371,7 @@ class RingBuilder(object):

        self._ring = None
        if self._last_part_moves_epoch is None:
+            self.logger.debug("New builder; performing initial balance")
            self._initial_balance()
            self.devs_changed = False
            self._build_dispersion_graph()
@ -363,16 +380,23 @@ class RingBuilder(object):
        self._update_last_part_moves()
        last_balance = 0
        new_parts, removed_part_count = self._adjust_replica2part2dev_size()
+        self.logger.debug(
+            "%d new parts and %d removed parts from replica-count change",
+            len(new_parts), removed_part_count)
        changed_parts += removed_part_count
        self._set_parts_wanted()
        self._reassign_parts(new_parts)
        changed_parts += len(new_parts)
        while True:
            reassign_parts = self._gather_reassign_parts()
-            self._reassign_parts(reassign_parts)
            changed_parts += len(reassign_parts)
+            self.logger.debug("Gathered %d parts", changed_parts)
+            self._reassign_parts(reassign_parts)
+            self.logger.debug("Assigned %d parts", changed_parts)
            while self._remove_devs:
-                self.devs[self._remove_devs.pop()['id']] = None
+                remove_dev_id = self._remove_devs.pop()['id']
+                self.logger.debug("Removing dev %d", remove_dev_id)
+                self.devs[remove_dev_id] = None
            balance = self.get_balance()
            if balance < 1 or abs(last_balance - balance) < 1 or \
                    changed_parts == self.parts:
@ -786,6 +810,9 @@ class RingBuilder(object):
                    if dev_id in dev_ids:
                        self._last_part_moves[part] = 0
                        removed_dev_parts[part].append(replica)
+                        self.logger.debug(
+                            "Gathered %d/%d from dev %d [dev removed]",
+                            part, replica, dev_id)

        # Now we gather partitions that are "at risk" because they aren't
        # currently sufficient spread out across the cluster.
@ -859,6 +886,9 @@ class RingBuilder(object):
                        dev['parts'] -= 1
                        removed_replica = True
                        moved_parts += 1
+                        self.logger.debug(
+                            "Gathered %d/%d from dev %d [dispersion]",
+                            part, replica, dev['id'])
                        break
                if removed_replica:
                    for tier in tfd[dev['id']]:
@ -894,6 +924,9 @@ class RingBuilder(object):
                    dev['parts_wanted'] += 1
                    dev['parts'] -= 1
                    reassign_parts[part].append(replica)
+                    self.logger.debug(
+                        "Gathered %d/%d from dev %d [weight]",
+                        part, replica, dev['id'])

        reassign_parts.update(spread_out_parts)
        reassign_parts.update(removed_dev_parts)
@ -1121,6 +1154,8 @@ class RingBuilder(object):
                        new_index, new_last_sort_key)

                self._replica2part2dev[replica][part] = dev['id']
+                self.logger.debug(
+                    "Placed %d/%d onto dev %d", part, replica, dev['id'])

        # Just to save memory and keep from accidental reuse.
        for dev in self._iter_devs():
--- a/swift/common/ring/ring.py
+++ b/swift/common/ring/ring.py
@ -243,7 +243,7 @@ class Ring(object):
                if dev_id not in seen_ids:
                    part_nodes.append(self.devs[dev_id])
                    seen_ids.add(dev_id)
-        return part_nodes
+        return [dict(node, index=i) for i, node in enumerate(part_nodes)]

    def get_part(self, account, container=None, obj=None):
        """
@ -291,6 +291,7 @@ class Ring(object):

        ======  ===============================================================
        id      unique integer identifier amongst devices
+        index   offset into the primary node list for the partition
        weight  a float of the relative weight of this device as compared to
                others; this indicates how many partitions the builder will try
                to assign to this device
--- a/swift/common/ring/utils.py
+++ b/swift/common/ring/utils.py
@ -515,6 +515,9 @@ def build_dev_from_opts(opts):
        (opts.replication_ip or opts.ip))
    replication_port = opts.replication_port or opts.port

+    if not validate_device_name(opts.device):
+        raise ValueError('Invalid device name')
+
    return {'region': opts.region, 'zone': opts.zone, 'ip': ip,
            'port': opts.port, 'device': opts.device, 'meta': opts.meta,
            'replication_ip': replication_ip,
@ -569,3 +572,10 @@ def get_tier_name(tier, builder):
        device = builder.devs[tier[3]] or {}
        return "r%sz%s-%s/%s" % (tier[0], tier[1], tier[2],
                                 device.get('device', 'IDd%s' % tier[3]))
+
+
+def validate_device_name(device_name):
+    return not (
+        device_name.startswith(' ') or
+        device_name.endswith(' ') or
+        len(device_name) == 0)
--- a/swift/common/storage_policy.py
+++ b/swift/common/storage_policy.py
@ -17,10 +17,18 @@ import string

 from swift.common.utils import config_true_value, SWIFT_CONF_FILE
 from swift.common.ring import Ring
+from swift.common.utils import quorum_size
+from swift.common.exceptions import RingValidationError
+from pyeclib.ec_iface import ECDriver, ECDriverError, VALID_EC_TYPES

 LEGACY_POLICY_NAME = 'Policy-0'
 VALID_CHARS = '-' + string.letters + string.digits

+DEFAULT_POLICY_TYPE = REPL_POLICY = 'replication'
+EC_POLICY = 'erasure_coding'
+
+DEFAULT_EC_OBJECT_SEGMENT_SIZE = 1048576
+

 class PolicyError(ValueError):

@ -38,36 +46,73 @@ def _get_policy_string(base, policy_index):
    return return_string


-def get_policy_string(base, policy_index):
+def get_policy_string(base, policy_or_index):
    """
-    Helper function to construct a string from a base and the policy
-    index.  Used to encode the policy index into either a file name
-    or a directory name by various modules.
+    Helper function to construct a string from a base and the policy.
+    Used to encode the policy index into either a file name or a
+    directory name by various modules.

    :param base: the base string
-    :param policy_index: the storage policy index
+    :param policy_or_index: StoragePolicy instance, or an index
+                            (string or int), if None the legacy
+                            storage Policy-0 is assumed.

    :returns: base name with policy index added
+    :raises: PolicyError if no policy exists with the given policy_index
    """
-    if POLICIES.get_by_index(policy_index) is None:
-        raise PolicyError("No policy with index %r" % policy_index)
-    return _get_policy_string(base, policy_index)
+    if isinstance(policy_or_index, BaseStoragePolicy):
+        policy = policy_or_index
+    else:
+        policy = POLICIES.get_by_index(policy_or_index)
+        if policy is None:
+            raise PolicyError("Unknown policy", index=policy_or_index)
+    return _get_policy_string(base, int(policy))


-class StoragePolicy(object):
+def split_policy_string(policy_string):
    """
-    Represents a storage policy.
-    Not meant to be instantiated directly; use
-    :func:`~swift.common.storage_policy.reload_storage_policies` to load
-    POLICIES from ``swift.conf``.
+    Helper function to convert a string representing a base and a
+    policy.  Used to decode the policy from either a file name or
+    a directory name by various modules.
+
+    :param policy_string: base name with policy index added
+
+    :raises: PolicyError if given index does not map to a valid policy
+    :returns: a tuple, in the form (base, policy) where base is the base
+              string and policy is the StoragePolicy instance for the
+              index encoded in the policy_string.
+    """
+    if '-' in policy_string:
+        base, policy_index = policy_string.rsplit('-', 1)
+    else:
+        base, policy_index = policy_string, None
+    policy = POLICIES.get_by_index(policy_index)
+    if get_policy_string(base, policy) != policy_string:
+        raise PolicyError("Unknown policy", index=policy_index)
+    return base, policy
+
+
+class BaseStoragePolicy(object):
+    """
+    Represents a storage policy.  Not meant to be instantiated directly;
+    implement a derived subclasses (e.g. StoragePolicy, ECStoragePolicy, etc)
+    or use :func:`~swift.common.storage_policy.reload_storage_policies` to
+    load POLICIES from ``swift.conf``.

    The object_ring property is lazy loaded once the service's ``swift_dir``
    is known via :meth:`~StoragePolicyCollection.get_object_ring`, but it may
    be over-ridden via object_ring kwarg at create time for testing or
    actively loaded with :meth:`~StoragePolicy.load_ring`.
    """
+
+    policy_type_to_policy_cls = {}
+
    def __init__(self, idx, name='', is_default=False, is_deprecated=False,
                 object_ring=None):
+        # do not allow BaseStoragePolicy class to be instantiated directly
+        if type(self) == BaseStoragePolicy:
+            raise TypeError("Can't instantiate BaseStoragePolicy directly")
+        # policy parameter validation
        try:
            self.idx = int(idx)
        except ValueError:
@ -88,6 +133,8 @@ class StoragePolicy(object):
        self.name = name
        self.is_deprecated = config_true_value(is_deprecated)
        self.is_default = config_true_value(is_default)
+        if self.policy_type not in BaseStoragePolicy.policy_type_to_policy_cls:
+            raise PolicyError('Invalid type', self.policy_type)
        if self.is_deprecated and self.is_default:
            raise PolicyError('Deprecated policy can not be default.  '
                              'Invalid config', self.idx)
@ -101,8 +148,80 @@ class StoragePolicy(object):
        return cmp(self.idx, int(other))

    def __repr__(self):
-        return ("StoragePolicy(%d, %r, is_default=%s, is_deprecated=%s)") % (
-            self.idx, self.name, self.is_default, self.is_deprecated)
+        return ("%s(%d, %r, is_default=%s, "
+                "is_deprecated=%s, policy_type=%r)") % \
+            (self.__class__.__name__, self.idx, self.name,
+             self.is_default, self.is_deprecated, self.policy_type)
+
+    @classmethod
+    def register(cls, policy_type):
+        """
+        Decorator for Storage Policy implementations to register
+        their StoragePolicy class.  This will also set the policy_type
+        attribute on the registered implementation.
+        """
+        def register_wrapper(policy_cls):
+            if policy_type in cls.policy_type_to_policy_cls:
+                raise PolicyError(
+                    '%r is already registered for the policy_type %r' % (
+                        cls.policy_type_to_policy_cls[policy_type],
+                        policy_type))
+            cls.policy_type_to_policy_cls[policy_type] = policy_cls
+            policy_cls.policy_type = policy_type
+            return policy_cls
+        return register_wrapper
+
+    @classmethod
+    def _config_options_map(cls):
+        """
+        Map config option name to StoragePolicy parameter name.
+        """
+        return {
+            'name': 'name',
+            'policy_type': 'policy_type',
+            'default': 'is_default',
+            'deprecated': 'is_deprecated',
+        }
+
+    @classmethod
+    def from_config(cls, policy_index, options):
+        config_to_policy_option_map = cls._config_options_map()
+        policy_options = {}
+        for config_option, value in options.items():
+            try:
+                policy_option = config_to_policy_option_map[config_option]
+            except KeyError:
+                raise PolicyError('Invalid option %r in '
+                                  'storage-policy section' % config_option,
+                                  index=policy_index)
+            policy_options[policy_option] = value
+        return cls(policy_index, **policy_options)
+
+    def get_info(self, config=False):
+        """
+        Return the info dict and conf file options for this policy.
+
+        :param config: boolean, if True all config options are returned
+        """
+        info = {}
+        for config_option, policy_attribute in \
+                self._config_options_map().items():
+            info[config_option] = getattr(self, policy_attribute)
+        if not config:
+            # remove some options for public consumption
+            if not self.is_default:
+                info.pop('default')
+            if not self.is_deprecated:
+                info.pop('deprecated')
+            info.pop('policy_type')
+        return info
+
+    def _validate_ring(self):
+        """
+        Hook, called when the ring is loaded.  Can be used to
+        validate the ring against the StoragePolicy configuration.
+        """
+        pass

    def load_ring(self, swift_dir):
        """
@ -114,6 +233,225 @@ class StoragePolicy(object):
            return
        self.object_ring = Ring(swift_dir, ring_name=self.ring_name)

+        # Validate ring to make sure it conforms to policy requirements
+        self._validate_ring()
+
+    @property
+    def quorum(self):
+        """
+        Number of successful backend requests needed for the proxy to
+        consider the client request successful.
+        """
+        raise NotImplementedError()
+
+
+@BaseStoragePolicy.register(REPL_POLICY)
+class StoragePolicy(BaseStoragePolicy):
+    """
+    Represents a storage policy of type 'replication'.  Default storage policy
+    class unless otherwise overridden from swift.conf.
+
+    Not meant to be instantiated directly; use
+    :func:`~swift.common.storage_policy.reload_storage_policies` to load
+    POLICIES from ``swift.conf``.
+    """
+
+    @property
+    def quorum(self):
+        """
+        Quorum concept in the replication case:
+            floor(number of replica / 2) + 1
+        """
+        if not self.object_ring:
+            raise PolicyError('Ring is not loaded')
+        return quorum_size(self.object_ring.replica_count)
+
+
+@BaseStoragePolicy.register(EC_POLICY)
+class ECStoragePolicy(BaseStoragePolicy):
+    """
+    Represents a storage policy of type 'erasure_coding'.
+
+    Not meant to be instantiated directly; use
+    :func:`~swift.common.storage_policy.reload_storage_policies` to load
+    POLICIES from ``swift.conf``.
+    """
+    def __init__(self, idx, name='', is_default=False,
+                 is_deprecated=False, object_ring=None,
+                 ec_segment_size=DEFAULT_EC_OBJECT_SEGMENT_SIZE,
+                 ec_type=None, ec_ndata=None, ec_nparity=None):
+
+        super(ECStoragePolicy, self).__init__(
+            idx, name, is_default, is_deprecated, object_ring)
+
+        # Validate erasure_coding policy specific members
+        # ec_type is one of the EC implementations supported by PyEClib
+        if ec_type is None:
+            raise PolicyError('Missing ec_type')
+        if ec_type not in VALID_EC_TYPES:
+            raise PolicyError('Wrong ec_type %s for policy %s, should be one'
+                              ' of "%s"' % (ec_type, self.name,
+                              ', '.join(VALID_EC_TYPES)))
+        self._ec_type = ec_type
+
+        # Define _ec_ndata as the number of EC data fragments
+        # Accessible as the property "ec_ndata"
+        try:
+            value = int(ec_ndata)
+            if value <= 0:
+                raise ValueError
+            self._ec_ndata = value
+        except (TypeError, ValueError):
+            raise PolicyError('Invalid ec_num_data_fragments %r' %
+                              ec_ndata, index=self.idx)
+
+        # Define _ec_nparity as the number of EC parity fragments
+        # Accessible as the property "ec_nparity"
+        try:
+            value = int(ec_nparity)
+            if value <= 0:
+                raise ValueError
+            self._ec_nparity = value
+        except (TypeError, ValueError):
+            raise PolicyError('Invalid ec_num_parity_fragments %r'
+                              % ec_nparity, index=self.idx)
+
+        # Define _ec_segment_size as the encode segment unit size
+        # Accessible as the property "ec_segment_size"
+        try:
+            value = int(ec_segment_size)
+            if value <= 0:
+                raise ValueError
+            self._ec_segment_size = value
+        except (TypeError, ValueError):
+            raise PolicyError('Invalid ec_object_segment_size %r' %
+                              ec_segment_size, index=self.idx)
+
+        # Initialize PyECLib EC backend
+        try:
+            self.pyeclib_driver = \
+                ECDriver(k=self._ec_ndata, m=self._ec_nparity,
+                         ec_type=self._ec_type)
+        except ECDriverError as e:
+            raise PolicyError("Error creating EC policy (%s)" % e,
+                              index=self.idx)
+
+        # quorum size in the EC case depends on the choice of EC scheme.
+        self._ec_quorum_size = \
+            self._ec_ndata + self.pyeclib_driver.min_parity_fragments_needed()
+
+    @property
+    def ec_type(self):
+        return self._ec_type
+
+    @property
+    def ec_ndata(self):
+        return self._ec_ndata
+
+    @property
+    def ec_nparity(self):
+        return self._ec_nparity
+
+    @property
+    def ec_segment_size(self):
+        return self._ec_segment_size
+
+    @property
+    def fragment_size(self):
+        """
+        Maximum length of a fragment, including header.
+
+        NB: a fragment archive is a sequence of 0 or more max-length
+        fragments followed by one possibly-shorter fragment.
+        """
+        # Technically pyeclib's get_segment_info signature calls for
+        # (data_len, segment_size) but on a ranged GET we don't know the
+        # ec-content-length header before we need to compute where in the
+        # object we should request to align with the fragment size.  So we
+        # tell pyeclib a lie - from it's perspective, as long as data_len >=
+        # segment_size it'll give us the answer we want.  From our
+        # perspective, because we only use this answer to calculate the
+        # *minimum* size we should read from an object body even if data_len <
+        # segment_size we'll still only read *the whole one and only last
+        # fragment* and pass than into pyeclib who will know what to do with
+        # it just as it always does when the last fragment is < fragment_size.
+        return self.pyeclib_driver.get_segment_info(
+            self.ec_segment_size, self.ec_segment_size)['fragment_size']
+
+    @property
+    def ec_scheme_description(self):
+        """
+        This short hand form of the important parts of the ec schema is stored
+        in Object System Metadata on the EC Fragment Archives for debugging.
+        """
+        return "%s %d+%d" % (self._ec_type, self._ec_ndata, self._ec_nparity)
+
+    def __repr__(self):
+        return ("%s, EC config(ec_type=%s, ec_segment_size=%d, "
+                "ec_ndata=%d, ec_nparity=%d)") % (
+                    super(ECStoragePolicy, self).__repr__(), self.ec_type,
+                    self.ec_segment_size, self.ec_ndata, self.ec_nparity)
+
+    @classmethod
+    def _config_options_map(cls):
+        options = super(ECStoragePolicy, cls)._config_options_map()
+        options.update({
+            'ec_type': 'ec_type',
+            'ec_object_segment_size': 'ec_segment_size',
+            'ec_num_data_fragments': 'ec_ndata',
+            'ec_num_parity_fragments': 'ec_nparity',
+        })
+        return options
+
+    def get_info(self, config=False):
+        info = super(ECStoragePolicy, self).get_info(config=config)
+        if not config:
+            info.pop('ec_object_segment_size')
+            info.pop('ec_num_data_fragments')
+            info.pop('ec_num_parity_fragments')
+            info.pop('ec_type')
+        return info
+
+    def _validate_ring(self):
+        """
+        EC specific validation
+
+        Replica count check - we need _at_least_ (#data + #parity) replicas
+        configured.  Also if the replica count is larger than exactly that
+        number there's a non-zero risk of error for code that is considering
+        the number of nodes in the primary list from the ring.
+        """
+        if not self.object_ring:
+            raise PolicyError('Ring is not loaded')
+        nodes_configured = self.object_ring.replica_count
+        if nodes_configured != (self.ec_ndata + self.ec_nparity):
+            raise RingValidationError(
+                'EC ring for policy %s needs to be configured with '
+                'exactly %d nodes. Got %d.' % (self.name,
+                self.ec_ndata + self.ec_nparity, nodes_configured))
+
+    @property
+    def quorum(self):
+        """
+        Number of successful backend requests needed for the proxy to consider
+        the client request successful.
+
+        The quorum size for EC policies defines the minimum number
+        of data + parity elements required to be able to guarantee
+        the desired fault tolerance, which is the number of data
+        elements supplemented by the minimum number of parity
+        elements required by the chosen erasure coding scheme.
+
+        For example, for Reed-Solomon, the minimum number parity
+        elements required is 1, and thus the quorum_size requirement
+        is ec_ndata + 1.
+
+        Given the number of parity elements required is not the same
+        for every erasure coding scheme, consult PyECLib for
+        min_parity_fragments_needed()
+        """
+        return self._ec_quorum_size
+

 class StoragePolicyCollection(object):
    """
@ -230,9 +568,19 @@ class StoragePolicyCollection(object):
        :returns: storage policy, or None if no such policy
        """
        # makes it easier for callers to just pass in a header value
-        index = int(index) if index else 0
+        if index in ('', None):
+            index = 0
+        else:
+            try:
+                index = int(index)
+            except ValueError:
+                return None
        return self.by_index.get(index)

+    @property
+    def legacy(self):
+        return self.get_by_index(None)
+
    def get_object_ring(self, policy_idx, swift_dir):
        """
        Get the ring object to use to handle a request based on its policy.
@ -261,10 +609,7 @@ class StoragePolicyCollection(object):
            # delete from /info if deprecated
            if pol.is_deprecated:
                continue
-            policy_entry = {}
-            policy_entry['name'] = pol.name
-            if pol.is_default:
-                policy_entry['default'] = pol.is_default
+            policy_entry = pol.get_info()
            policy_info.append(policy_entry)
        return policy_info

@ -281,22 +626,10 @@ def parse_storage_policies(conf):
        if not section.startswith('storage-policy:'):
            continue
        policy_index = section.split(':', 1)[1]
-        # map config option name to StoragePolicy parameter name
-        config_to_policy_option_map = {
-            'name': 'name',
-            'default': 'is_default',
-            'deprecated': 'is_deprecated',
-        }
-        policy_options = {}
-        for config_option, value in conf.items(section):
-            try:
-                policy_option = config_to_policy_option_map[config_option]
-            except KeyError:
-                raise PolicyError('Invalid option %r in '
-                                  'storage-policy section %r' % (
-                                      config_option, section))
-            policy_options[policy_option] = value
-        policy = StoragePolicy(policy_index, **policy_options)
+        config_options = dict(conf.items(section))
+        policy_type = config_options.pop('policy_type', DEFAULT_POLICY_TYPE)
+        policy_cls = BaseStoragePolicy.policy_type_to_policy_cls[policy_type]
+        policy = policy_cls.from_config(policy_index, config_options)
        policies.append(policy)

    return StoragePolicyCollection(policies)
--- a/swift/common/swob.py
+++ b/swift/common/swob.py
@ -36,7 +36,7 @@ needs to change.
 """

 from collections import defaultdict
-from cStringIO import StringIO
+from StringIO import StringIO
 import UserDict
 import time
 from functools import partial
@ -128,6 +128,20 @@ class _UTC(tzinfo):
 UTC = _UTC()


+class WsgiStringIO(StringIO):
+    """
+    This class adds support for the additional wsgi.input methods defined on
+    eventlet.wsgi.Input to the StringIO class which would otherwise be a fine
+    stand-in for the file-like object in the WSGI environment.
+    """
+
+    def set_hundred_continue_response_headers(self, headers):
+        pass
+
+    def send_hundred_continue_response(self):
+        pass
+
+
 def _datetime_property(header):
    """
    Set and retrieve the datetime value of self.headers[header]
@ -743,16 +757,16 @@ def _req_environ_property(environ_field):
 def _req_body_property():
    """
    Set and retrieve the Request.body parameter.  It consumes wsgi.input and
-    returns the results.  On assignment, uses a StringIO to create a new
+    returns the results.  On assignment, uses a WsgiStringIO to create a new
    wsgi.input.
    """
    def getter(self):
        body = self.environ['wsgi.input'].read()
-        self.environ['wsgi.input'] = StringIO(body)
+        self.environ['wsgi.input'] = WsgiStringIO(body)
        return body

    def setter(self, value):
-        self.environ['wsgi.input'] = StringIO(value)
+        self.environ['wsgi.input'] = WsgiStringIO(value)
        self.environ['CONTENT_LENGTH'] = str(len(value))

    return property(getter, setter, doc="Get and set the request body str")
@ -820,7 +834,7 @@ class Request(object):
        :param path: encoded, parsed, and unquoted into PATH_INFO
        :param environ: WSGI environ dictionary
        :param headers: HTTP headers
-        :param body: stuffed in a StringIO and hung on wsgi.input
+        :param body: stuffed in a WsgiStringIO and hung on wsgi.input
        :param kwargs: any environ key with an property setter
        """
        headers = headers or {}
@ -855,10 +869,10 @@ class Request(object):
        }
        env.update(environ)
        if body is not None:
-            env['wsgi.input'] = StringIO(body)
+            env['wsgi.input'] = WsgiStringIO(body)
            env['CONTENT_LENGTH'] = str(len(body))
        elif 'wsgi.input' not in env:
-            env['wsgi.input'] = StringIO('')
+            env['wsgi.input'] = WsgiStringIO('')
        req = Request(env)
        for key, val in headers.iteritems():
            req.headers[key] = val
@ -928,6 +942,10 @@ class Request(object):
        if entity_path is not None:
            return '/' + entity_path

+    @property
+    def is_chunked(self):
+        return 'chunked' in self.headers.get('transfer-encoding', '')
+
    @property
    def url(self):
        "Provides the full url of the request"
@ -961,7 +979,7 @@ class Request(object):
        env.update({
            'REQUEST_METHOD': 'GET',
            'CONTENT_LENGTH': '0',
-            'wsgi.input': StringIO(''),
+            'wsgi.input': WsgiStringIO(''),
        })
        return Request(env)

@ -1098,10 +1116,12 @@ class Response(object):
    app_iter = _resp_app_iter_property()

    def __init__(self, body=None, status=200, headers=None, app_iter=None,
-                 request=None, conditional_response=False, **kw):
+                 request=None, conditional_response=False,
+                 conditional_etag=None, **kw):
        self.headers = HeaderKeyDict(
            [('Content-Type', 'text/html; charset=UTF-8')])
        self.conditional_response = conditional_response
+        self._conditional_etag = conditional_etag
        self.request = request
        self.body = body
        self.app_iter = app_iter
@ -1127,6 +1147,26 @@ class Response(object):
        if 'charset' in kw and 'content_type' in kw:
            self.charset = kw['charset']

+    @property
+    def conditional_etag(self):
+        """
+        The conditional_etag keyword argument for Response will allow the
+        conditional match value of a If-Match request to be compared to a
+        non-standard value.
+
+        This is available for Storage Policies that do not store the client
+        object data verbatim on the storage nodes, but still need support
+        conditional requests.
+
+        It's most effectively used with X-Backend-Etag-Is-At which would
+        define the additional Metadata key where the original ETag of the
+        clear-form client request data.
+        """
+        if self._conditional_etag is not None:
+            return self._conditional_etag
+        else:
+            return self.etag
+
    def _prepare_for_ranges(self, ranges):
        """
        Prepare the Response for multiple ranges.
@ -1157,15 +1197,16 @@ class Response(object):
        return content_size, content_type

    def _response_iter(self, app_iter, body):
+        etag = self.conditional_etag
        if self.conditional_response and self.request:
-            if self.etag and self.request.if_none_match and \
-                    self.etag in self.request.if_none_match:
+            if etag and self.request.if_none_match and \
+                    etag in self.request.if_none_match:
                self.status = 304
                self.content_length = 0
                return ['']

-            if self.etag and self.request.if_match and \
-               self.etag not in self.request.if_match:
+            if etag and self.request.if_match and \
+               etag not in self.request.if_match:
                self.status = 412
                self.content_length = 0
                return ['']
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@ -1062,19 +1062,21 @@ class NullLogger(object):

 class LoggerFileObject(object):

-    def __init__(self, logger):
+    def __init__(self, logger, log_type='STDOUT'):
        self.logger = logger
+        self.log_type = log_type

    def write(self, value):
        value = value.strip()
        if value:
            if 'Connection reset by peer' in value:
-                self.logger.error(_('STDOUT: Connection reset by peer'))
+                self.logger.error(
+                    _('%s: Connection reset by peer'), self.log_type)
            else:
-                self.logger.error(_('STDOUT: %s'), value)
+                self.logger.error(_('%s: %s'), self.log_type, value)

    def writelines(self, values):
-        self.logger.error(_('STDOUT: %s'), '#012'.join(values))
+        self.logger.error(_('%s: %s'), self.log_type, '#012'.join(values))

    def close(self):
        pass
@ -1527,11 +1529,11 @@ def get_logger(conf, name=None, log_to_console=False, log_route=None,
                logger_hook(conf, name, log_to_console, log_route, fmt,
                            logger, adapted_logger)
            except (AttributeError, ImportError):
-                print(
-                    'Error calling custom handler [%s]' % hook,
-                    file=sys.stderr)
+                print('Error calling custom handler [%s]' % hook,
+                      file=sys.stderr)
            except ValueError:
-                print('Invalid custom handler format [%s]' % hook, sys.stderr)
+                print('Invalid custom handler format [%s]' % hook,
+                      file=sys.stderr)

    # Python 2.6 has the undesirable property of keeping references to all log
    # handlers around forever in logging._handlers and logging._handlerList.
@ -1641,7 +1643,7 @@ def capture_stdio(logger, **kwargs):
    if kwargs.pop('capture_stdout', True):
        sys.stdout = LoggerFileObject(logger)
    if kwargs.pop('capture_stderr', True):
-        sys.stderr = LoggerFileObject(logger)
+        sys.stderr = LoggerFileObject(logger, 'STDERR')


 def parse_options(parser=None, once=False, test_args=None):
@ -2234,11 +2236,16 @@ class GreenAsyncPile(object):

    Correlating results with jobs (if necessary) is left to the caller.
    """
-    def __init__(self, size):
+    def __init__(self, size_or_pool):
        """
-        :param size: size pool of green threads to use
+        :param size_or_pool: thread pool size or a pool to use
        """
-        self._pool = GreenPool(size)
+        if isinstance(size_or_pool, GreenPool):
+            self._pool = size_or_pool
+            size = self._pool.size
+        else:
+            self._pool = GreenPool(size_or_pool)
+            size = size_or_pool
        self._responses = eventlet.queue.LightQueue(size)
        self._inflight = 0

@ -2644,6 +2651,10 @@ def public(func):

 def quorum_size(n):
    """
+    quorum size as it applies to services that use 'replication' for data
+    integrity  (Account/Container services).  Object quorum_size is defined
+    on a storage policy basis.
+
    Number of successful backend requests needed for the proxy to consider
    the client request successful.
    """
@ -3137,6 +3148,26 @@ _rfc_extension_pattern = re.compile(
    r'(?:\s*;\s*(' + _rfc_token + r")\s*(?:=\s*(" + _rfc_token +
    r'|"(?:[^"\\]|\\.)*"))?)')

+_content_range_pattern = re.compile(r'^bytes (\d+)-(\d+)/(\d+)$')
+
+
+def parse_content_range(content_range):
+    """
+    Parse a content-range header into (first_byte, last_byte, total_size).
+
+    See RFC 7233 section 4.2 for details on the header format, but it's
+    basically "Content-Range: bytes ${start}-${end}/${total}".
+
+    :param content_range: Content-Range header value to parse,
+        e.g. "bytes 100-1249/49004"
+    :returns: 3-tuple (start, end, total)
+    :raises: ValueError if malformed
+    """
+    found = re.search(_content_range_pattern, content_range)
+    if not found:
+        raise ValueError("malformed Content-Range %r" % (content_range,))
+    return tuple(int(x) for x in found.groups())
+

 def parse_content_type(content_type):
    """
@ -3291,8 +3322,11 @@ def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096):
    :raises: MimeInvalid if the document is malformed
    """
    boundary = '--' + boundary
-    if wsgi_input.readline(len(boundary + '\r\n')).strip() != boundary:
-        raise swift.common.exceptions.MimeInvalid('invalid starting boundary')
+    blen = len(boundary) + 2  # \r\n
+    got = wsgi_input.readline(blen)
+    if got.strip() != boundary:
+        raise swift.common.exceptions.MimeInvalid(
+            'invalid starting boundary: wanted %r, got %r', (boundary, got))
    boundary = '\r\n' + boundary
    input_buffer = ''
    done = False
--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@ -25,6 +25,7 @@ import time
 import mimetools
 from swift import gettext_ as _
 from StringIO import StringIO
+from textwrap import dedent

 import eventlet
 import eventlet.debug
@ -96,13 +97,34 @@ def _loadconfigdir(object_type, uri, path, name, relative_to, global_conf):
 loadwsgi._loaders['config_dir'] = _loadconfigdir


+class ConfigString(NamedConfigLoader):
+    """
+    Wrap a raw config string up for paste.deploy.
+
+    If you give one of these to our loadcontext (e.g. give it to our
+    appconfig) we'll intercept it and get it routed to the right loader.
+    """
+
+    def __init__(self, config_string):
+        self.contents = StringIO(dedent(config_string))
+        self.filename = "string"
+        defaults = {
+            'here': "string",
+            '__file__': "string",
+        }
+        self.parser = loadwsgi.NicerConfigParser("string", defaults=defaults)
+        self.parser.optionxform = str  # Don't lower-case keys
+        self.parser.readfp(self.contents)
+
+
 def wrap_conf_type(f):
    """
    Wrap a function whos first argument is a paste.deploy style config uri,
-    such that you can pass it an un-adorned raw filesystem path and the config
-    directive (either config: or config_dir:) will be added automatically
-    based on the type of filesystem entity at the given path (either a file or
-    directory) before passing it through to the paste.deploy function.
+    such that you can pass it an un-adorned raw filesystem path (or config
+    string) and the config directive (either config:, config_dir:, or
+    config_str:) will be added automatically based on the type of entity
+    (either a file or directory, or if no such entity on the file system -
+    just a string) before passing it through to the paste.deploy function.
    """
    def wrapper(conf_path, *args, **kwargs):
        if os.path.isdir(conf_path):
@ -332,6 +354,12 @@ class PipelineWrapper(object):

 def loadcontext(object_type, uri, name=None, relative_to=None,
                global_conf=None):
+    if isinstance(uri, loadwsgi.ConfigLoader):
+        # bypass loadcontext's uri parsing and loader routing and
+        # just directly return the context
+        if global_conf:
+            uri.update_defaults(global_conf, overwrite=False)
+        return uri.get_context(object_type, name, global_conf)
    add_conf_type = wrap_conf_type(lambda x: x)
    return loadwsgi.loadcontext(object_type, add_conf_type(uri), name=name,
                                relative_to=relative_to,
--- a/swift/container/backend.py
+++ b/swift/container/backend.py
@ -158,6 +158,8 @@ class ContainerBroker(DatabaseBroker):
        if not self.container:
            raise ValueError(
                'Attempting to create a new database with no container set')
+        if storage_policy_index is None:
+            storage_policy_index = 0
        self.create_object_table(conn)
        self.create_policy_stat_table(conn, storage_policy_index)
        self.create_container_info_table(conn, put_timestamp,
--- a/swift/container/reconciler.py
+++ b/swift/container/reconciler.py
@ -137,7 +137,7 @@ def get_reconciler_content_type(op):
        raise ValueError('invalid operation type %r' % op)


-def get_row_to_q_entry_translater(broker):
+def get_row_to_q_entry_translator(broker):
    account = broker.account
    container = broker.container
    op_type = {
@ -145,7 +145,7 @@ def get_row_to_q_entry_translater(broker):
        1: get_reconciler_content_type('delete'),
    }

-    def translater(obj_info):
+    def translator(obj_info):
        name = get_reconciler_obj_name(obj_info['storage_policy_index'],
                                       account, container,
                                       obj_info['name'])
@ -157,7 +157,7 @@ def get_row_to_q_entry_translater(broker):
            'content_type': op_type[obj_info['deleted']],
            'size': 0,
        }
-    return translater
+    return translator


 def add_to_reconciler_queue(container_ring, account, container, obj,
--- a/swift/container/replicator.py
+++ b/swift/container/replicator.py
@ -22,7 +22,7 @@ from eventlet import Timeout
 from swift.container.backend import ContainerBroker, DATADIR
 from swift.container.reconciler import (
    MISPLACED_OBJECTS_ACCOUNT, incorrect_policy_index,
-    get_reconciler_container_name, get_row_to_q_entry_translater)
+    get_reconciler_container_name, get_row_to_q_entry_translator)
 from swift.common import db_replicator
 from swift.common.storage_policy import POLICIES
 from swift.common.exceptions import DeviceUnavailable
@ -166,14 +166,14 @@ class ContainerReplicator(db_replicator.Replicator):
        misplaced = broker.get_misplaced_since(point, self.per_diff)
        if not misplaced:
            return max_sync
-        translater = get_row_to_q_entry_translater(broker)
+        translator = get_row_to_q_entry_translator(broker)
        errors = False
        low_sync = point
        while misplaced:
            batches = defaultdict(list)
            for item in misplaced:
                container = get_reconciler_container_name(item['created_at'])
-                batches[container].append(translater(item))
+                batches[container].append(translator(item))
            for container, item_list in batches.items():
                success = self.feed_reconciler(container, item_list)
                if not success:
--- a/swift/container/sync.py
+++ b/swift/container/sync.py
@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import errno
 import os
 import uuid
 from swift import gettext_ as _
@ -25,8 +26,8 @@ from eventlet import sleep, Timeout
 import swift.common.db
 from swift.container.backend import ContainerBroker, DATADIR
 from swift.common.container_sync_realms import ContainerSyncRealms
-from swift.common.direct_client import direct_get_object
-from swift.common.internal_client import delete_object, put_object
+from swift.common.internal_client import (
+    delete_object, put_object, InternalClient, UnexpectedResponse)
 from swift.common.exceptions import ClientException
 from swift.common.ring import Ring
 from swift.common.ring.utils import is_local_device
@ -37,6 +38,55 @@ from swift.common.utils import (
 from swift.common.daemon import Daemon
 from swift.common.http import HTTP_UNAUTHORIZED, HTTP_NOT_FOUND
 from swift.common.storage_policy import POLICIES
+from swift.common.wsgi import ConfigString
+
+
+# The default internal client config body is to support upgrades without
+# requiring deployment of the new /etc/swift/internal-client.conf
+ic_conf_body = """
+[DEFAULT]
+# swift_dir = /etc/swift
+# user = swift
+# You can specify default log routing here if you want:
+# log_name = swift
+# log_facility = LOG_LOCAL0
+# log_level = INFO
+# log_address = /dev/log
+#
+# comma separated list of functions to call to setup custom log handlers.
+# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
+# adapted_logger
+# log_custom_handlers =
+#
+# If set, log_udp_host will override log_address
+# log_udp_host =
+# log_udp_port = 514
+#
+# You can enable StatsD logging here:
+# log_statsd_host = localhost
+# log_statsd_port = 8125
+# log_statsd_default_sample_rate = 1.0
+# log_statsd_sample_rate_factor = 1.0
+# log_statsd_metric_prefix =
+
+[pipeline:main]
+pipeline = catch_errors proxy-logging cache proxy-server
+
+[app:proxy-server]
+use = egg:swift#proxy
+# See proxy-server.conf-sample for options
+
+[filter:cache]
+use = egg:swift#memcache
+# See proxy-server.conf-sample for options
+
+[filter:proxy-logging]
+use = egg:swift#proxy_logging
+
+[filter:catch_errors]
+use = egg:swift#catch_errors
+# See proxy-server.conf-sample for options
+""".lstrip()


 class ContainerSync(Daemon):
@ -103,12 +153,12 @@ class ContainerSync(Daemon):
                           loaded. This is overridden by unit tests.
    """

-    def __init__(self, conf, container_ring=None):
+    def __init__(self, conf, container_ring=None, logger=None):
        #: The dict of configuration values from the [container-sync] section
        #: of the container-server.conf.
        self.conf = conf
        #: Logger to use for container-sync log lines.
-        self.logger = get_logger(conf, log_route='container-sync')
+        self.logger = logger or get_logger(conf, log_route='container-sync')
        #: Path to the local device mount points.
        self.devices = conf.get('devices', '/srv/node')
        #: Indicates whether mount points should be verified as actual mount
@ -158,6 +208,27 @@ class ContainerSync(Daemon):
        self._myport = int(conf.get('bind_port', 6001))
        swift.common.db.DB_PREALLOCATION = \
            config_true_value(conf.get('db_preallocation', 'f'))
+        self.conn_timeout = float(conf.get('conn_timeout', 5))
+        request_tries = int(conf.get('request_tries') or 3)
+
+        internal_client_conf_path = conf.get('internal_client_conf_path')
+        if not internal_client_conf_path:
+            self.logger.warning(
+                _('Configuration option internal_client_conf_path not '
+                  'defined. Using default configuration, See '
+                  'internal-client.conf-sample for options'))
+            internal_client_conf = ConfigString(ic_conf_body)
+        else:
+            internal_client_conf = internal_client_conf_path
+        try:
+            self.swift = InternalClient(
+                internal_client_conf, 'Swift Container Sync', request_tries)
+        except IOError as err:
+            if err.errno != errno.ENOENT:
+                raise
+            raise SystemExit(
+                _('Unable to load internal client from config: %r (%s)') %
+                (internal_client_conf_path, err))

    def get_object_ring(self, policy_idx):
        """
@ -361,7 +432,8 @@ class ContainerSync(Daemon):
                        headers['x-container-sync-key'] = user_key
                    delete_object(sync_to, name=row['name'], headers=headers,
                                  proxy=self.select_http_proxy(),
-                                  logger=self.logger)
+                                  logger=self.logger,
+                                  timeout=self.conn_timeout)
                except ClientException as err:
                    if err.http_status != HTTP_NOT_FOUND:
                        raise
@ -378,39 +450,32 @@ class ContainerSync(Daemon):
                looking_for_timestamp = Timestamp(row['created_at'])
                timestamp = -1
                headers = body = None
-                headers_out = {'X-Backend-Storage-Policy-Index':
+                # look up for the newest one
+                headers_out = {'X-Newest': True,
+                               'X-Backend-Storage-Policy-Index':
                               str(info['storage_policy_index'])}
-                for node in nodes:
-                    try:
-                        these_headers, this_body = direct_get_object(
-                            node, part, info['account'], info['container'],
-                            row['name'], headers=headers_out,
-                            resp_chunk_size=65536)
-                        this_timestamp = Timestamp(
-                            these_headers['x-timestamp'])
-                        if this_timestamp > timestamp:
-                            timestamp = this_timestamp
-                            headers = these_headers
-                            body = this_body
-                    except ClientException as err:
-                        # If any errors are not 404, make sure we report the
-                        # non-404 one. We don't want to mistakenly assume the
-                        # object no longer exists just because one says so and
-                        # the others errored for some other reason.
-                        if not exc or getattr(
-                                exc, 'http_status', HTTP_NOT_FOUND) == \
-                                HTTP_NOT_FOUND:
-                            exc = err
-                    except (Exception, Timeout) as err:
-                        exc = err
+                try:
+                    source_obj_status, source_obj_info, source_obj_iter = \
+                        self.swift.get_object(info['account'],
+                                              info['container'], row['name'],
+                                              headers=headers_out,
+                                              acceptable_statuses=(2, 4))
+
+                except (Exception, UnexpectedResponse, Timeout) as err:
+                    source_obj_info = {}
+                    source_obj_iter = None
+                    exc = err
+                timestamp = Timestamp(source_obj_info.get(
+                                      'x-timestamp', 0))
+                headers = source_obj_info
+                body = source_obj_iter
                if timestamp < looking_for_timestamp:
                    if exc:
                        raise exc
                    raise Exception(
-                        _('Unknown exception trying to GET: %(node)r '
+                        _('Unknown exception trying to GET: '
                          '%(account)r %(container)r %(object)r'),
-                        {'node': node, 'part': part,
-                         'account': info['account'],
+                        {'account': info['account'],
                         'container': info['container'],
                         'object': row['name']})
                for key in ('date', 'last-modified'):
@ -434,7 +499,8 @@ class ContainerSync(Daemon):
                    headers['x-container-sync-key'] = user_key
                put_object(sync_to, name=row['name'], headers=headers,
                           contents=FileLikeIter(body),
-                           proxy=self.select_http_proxy(), logger=self.logger)
+                           proxy=self.select_http_proxy(), logger=self.logger,
+                           timeout=self.conn_timeout)
                self.container_puts += 1
                self.logger.increment('puts')
                self.logger.timing_since('puts.timing', start_time)
--- a/swift/locale/swift.pot
+++ b/swift/locale/swift.pot
--- a/swift/locale/zh_CN/LC_MESSAGES/swift.po
+++ b/swift/locale/zh_CN/LC_MESSAGES/swift.po
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
--- a/swift/obj/mem_diskfile.py
+++ b/swift/obj/mem_diskfile.py
@ -57,6 +57,12 @@ class InMemoryFileSystem(object):
    def get_diskfile(self, account, container, obj, **kwargs):
        return DiskFile(self, account, container, obj)

+    def pickle_async_update(self, *args, **kwargs):
+        """
+        For now don't handle async updates.
+        """
+        pass
+

 class DiskFileWriter(object):
    """
@ -98,6 +104,16 @@ class DiskFileWriter(object):
        metadata['name'] = self._name
        self._filesystem.put_object(self._name, self._fp, metadata)

+    def commit(self, timestamp):
+        """
+        Perform any operations necessary to mark the object as durable. For
+        mem_diskfile type this is a no-op.
+
+        :param timestamp: object put timestamp, an instance of
+                          :class:`~swift.common.utils.Timestamp`
+        """
+        pass
+

 class DiskFileReader(object):
    """
--- a/swift/obj/mem_server.py
+++ b/swift/obj/mem_server.py
@ -15,15 +15,7 @@

 """ In-Memory Object Server for Swift """

-import os
-from swift import gettext_ as _

-from eventlet import Timeout
-
-from swift.common.bufferedhttp import http_connect
-from swift.common.exceptions import ConnectionTimeout
-
-from swift.common.http import is_success
 from swift.obj.mem_diskfile import InMemoryFileSystem
 from swift.obj import server

@ -53,49 +45,6 @@ class ObjectController(server.ObjectController):
        """
        return self._filesystem.get_diskfile(account, container, obj, **kwargs)

-    def async_update(self, op, account, container, obj, host, partition,
-                     contdevice, headers_out, objdevice, policy_idx):
-        """
-        Sends or saves an async update.
-
-        :param op: operation performed (ex: 'PUT', or 'DELETE')
-        :param account: account name for the object
-        :param container: container name for the object
-        :param obj: object name
-        :param host: host that the container is on
-        :param partition: partition that the container is on
-        :param contdevice: device name that the container is on
-        :param headers_out: dictionary of headers to send in the container
-                            request
-        :param objdevice: device name that the object is in
-        :param policy_idx: the associated storage policy index
-        """
-        headers_out['user-agent'] = 'object-server %s' % os.getpid()
-        full_path = '/%s/%s/%s' % (account, container, obj)
-        if all([host, partition, contdevice]):
-            try:
-                with ConnectionTimeout(self.conn_timeout):
-                    ip, port = host.rsplit(':', 1)
-                    conn = http_connect(ip, port, contdevice, partition, op,
-                                        full_path, headers_out)
-                with Timeout(self.node_timeout):
-                    response = conn.getresponse()
-                    response.read()
-                    if is_success(response.status):
-                        return
-                    else:
-                        self.logger.error(_(
-                            'ERROR Container update failed: %(status)d '
-                            'response from %(ip)s:%(port)s/%(dev)s'),
-                            {'status': response.status, 'ip': ip, 'port': port,
-                             'dev': contdevice})
-            except (Exception, Timeout):
-                self.logger.exception(_(
-                    'ERROR container update failed with '
-                    '%(ip)s:%(port)s/%(dev)s'),
-                    {'ip': ip, 'port': port, 'dev': contdevice})
-        # FIXME: For now don't handle async updates
-
    def REPLICATE(self, request):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@ -0,0 +1,927 @@
+# Copyright (c) 2010-2015 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from os.path import join
+import random
+import time
+import itertools
+from collections import defaultdict
+import cPickle as pickle
+import shutil
+
+from eventlet import (GreenPile, GreenPool, Timeout, sleep, hubs, tpool,
+                      spawn)
+from eventlet.support.greenlets import GreenletExit
+
+from swift import gettext_ as _
+from swift.common.utils import (
+    whataremyips, unlink_older_than, compute_eta, get_logger,
+    dump_recon_cache, ismount, mkdirs, config_true_value, list_from_csv,
+    get_hub, tpool_reraise, GreenAsyncPile, Timestamp, remove_file)
+from swift.common.swob import HeaderKeyDict
+from swift.common.bufferedhttp import http_connect
+from swift.common.daemon import Daemon
+from swift.common.ring.utils import is_local_device
+from swift.obj.ssync_sender import Sender as ssync_sender
+from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
+from swift.obj.diskfile import DiskFileRouter, get_data_dir, \
+    get_tmp_dir
+from swift.common.storage_policy import POLICIES, EC_POLICY
+from swift.common.exceptions import ConnectionTimeout, DiskFileError, \
+    SuffixSyncError
+
+SYNC, REVERT = ('sync_only', 'sync_revert')
+
+
+hubs.use_hub(get_hub())
+
+
+def _get_partners(frag_index, part_nodes):
+    """
+    Returns the left and right partners of the node whose index is
+    equal to the given frag_index.
+
+    :param frag_index: a fragment index
+    :param part_nodes: a list of primary nodes
+    :returns: [<node-to-left>, <node-to-right>]
+    """
+    return [
+        part_nodes[(frag_index - 1) % len(part_nodes)],
+        part_nodes[(frag_index + 1) % len(part_nodes)],
+    ]
+
+
+class RebuildingECDiskFileStream(object):
+    """
+    This class wraps the the reconstructed fragment archive data and
+    metadata in the DiskFile interface for ssync.
+    """
+
+    def __init__(self, metadata, frag_index, rebuilt_fragment_iter):
+        # start with metadata from a participating FA
+        self.metadata = metadata
+
+        # the new FA is going to have the same length as others in the set
+        self._content_length = self.metadata['Content-Length']
+
+        # update the FI and delete the ETag, the obj server will
+        # recalc on the other side...
+        self.metadata['X-Object-Sysmeta-Ec-Frag-Index'] = frag_index
+        for etag_key in ('ETag', 'Etag'):
+            self.metadata.pop(etag_key, None)
+
+        self.frag_index = frag_index
+        self.rebuilt_fragment_iter = rebuilt_fragment_iter
+
+    def get_metadata(self):
+        return self.metadata
+
+    @property
+    def content_length(self):
+        return self._content_length
+
+    def reader(self):
+        for chunk in self.rebuilt_fragment_iter:
+            yield chunk
+
+
+class ObjectReconstructor(Daemon):
+    """
+    Reconstruct objects using erasure code.  And also rebalance EC Fragment
+    Archive objects off handoff nodes.
+
+    Encapsulates most logic and data needed by the object reconstruction
+    process. Each call to .reconstruct() performs one pass.  It's up to the
+    caller to do this in a loop.
+    """
+
+    def __init__(self, conf, logger=None):
+        """
+        :param conf: configuration object obtained from ConfigParser
+        :param logger: logging object
+        """
+        self.conf = conf
+        self.logger = logger or get_logger(
+            conf, log_route='object-reconstructor')
+        self.devices_dir = conf.get('devices', '/srv/node')
+        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
+        self.swift_dir = conf.get('swift_dir', '/etc/swift')
+        self.port = int(conf.get('bind_port', 6000))
+        self.concurrency = int(conf.get('concurrency', 1))
+        self.stats_interval = int(conf.get('stats_interval', '300'))
+        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
+        self.next_check = time.time() + self.ring_check_interval
+        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
+        self.partition_times = []
+        self.run_pause = int(conf.get('run_pause', 30))
+        self.http_timeout = int(conf.get('http_timeout', 60))
+        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
+        self.recon_cache_path = conf.get('recon_cache_path',
+                                         '/var/cache/swift')
+        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
+        # defaults subject to change after beta
+        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
+        self.node_timeout = float(conf.get('node_timeout', 10))
+        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
+        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
+        self.headers = {
+            'Content-Length': '0',
+            'user-agent': 'obj-reconstructor %s' % os.getpid()}
+        self.handoffs_first = config_true_value(conf.get('handoffs_first',
+                                                         False))
+        self._df_router = DiskFileRouter(conf, self.logger)
+
+    def load_object_ring(self, policy):
+        """
+        Make sure the policy's rings are loaded.
+
+        :param policy: the StoragePolicy instance
+        :returns: appropriate ring object
+        """
+        policy.load_ring(self.swift_dir)
+        return policy.object_ring
+
+    def check_ring(self, object_ring):
+        """
+        Check to see if the ring has been updated
+
+        :param object_ring: the ring to check
+        :returns: boolean indicating whether or not the ring has changed
+        """
+        if time.time() > self.next_check:
+            self.next_check = time.time() + self.ring_check_interval
+            if object_ring.has_changed():
+                return False
+        return True
+
+    def _full_path(self, node, part, path, policy):
+        return '%(replication_ip)s:%(replication_port)s' \
+            '/%(device)s/%(part)s%(path)s ' \
+            'policy#%(policy)d frag#%(frag_index)s' % {
+                'replication_ip': node['replication_ip'],
+                'replication_port': node['replication_port'],
+                'device': node['device'],
+                'part': part, 'path': path,
+                'policy': policy,
+                'frag_index': node.get('index', 'handoff'),
+            }
+
+    def _get_response(self, node, part, path, headers, policy):
+        """
+        Helper method for reconstruction that GETs a single EC fragment
+        archive
+
+        :param node: the node to GET from
+        :param part: the partition
+        :param path: full path of the desired EC archive
+        :param headers: the headers to send
+        :param policy: an instance of
+                       :class:`~swift.common.storage_policy.BaseStoragePolicy`
+        :returns: response
+        """
+        resp = None
+        headers['X-Backend-Node-Index'] = node['index']
+        try:
+            with ConnectionTimeout(self.conn_timeout):
+                conn = http_connect(node['ip'], node['port'], node['device'],
+                                    part, 'GET', path, headers=headers)
+            with Timeout(self.node_timeout):
+                resp = conn.getresponse()
+            if resp.status != HTTP_OK:
+                self.logger.warning(
+                    _("Invalid response %(resp)s from %(full_path)s"),
+                    {'resp': resp.status,
+                     'full_path': self._full_path(node, part, path, policy)})
+                resp = None
+        except (Exception, Timeout):
+            self.logger.exception(
+                _("Trying to GET %(full_path)s"), {
+                    'full_path': self._full_path(node, part, path, policy)})
+        return resp
+
+    def reconstruct_fa(self, job, node, metadata):
+        """
+        Reconstructs a fragment archive - this method is called from ssync
+        after a remote node responds that is missing this object - the local
+        diskfile is opened to provide metadata - but to reconstruct the
+        missing fragment archive we must connect to multiple object servers.
+
+        :param job: job from ssync_sender
+        :param node: node that we're rebuilding to
+        :param metadata:  the metadata to attach to the rebuilt archive
+        :returns: a DiskFile like class for use by ssync
+        :raises DiskFileError: if the fragment archive cannot be reconstructed
+        """
+
+        part_nodes = job['policy'].object_ring.get_part_nodes(
+            job['partition'])
+        part_nodes.remove(node)
+
+        # the fragment index we need to reconstruct is the position index
+        # of the node we're rebuilding to within the primary part list
+        fi_to_rebuild = node['index']
+
+        # KISS send out connection requests to all nodes, see what sticks
+        headers = {
+            'X-Backend-Storage-Policy-Index': int(job['policy']),
+        }
+        pile = GreenAsyncPile(len(part_nodes))
+        path = metadata['name']
+        for node in part_nodes:
+            pile.spawn(self._get_response, node, job['partition'],
+                       path, headers, job['policy'])
+        responses = []
+        etag = None
+        for resp in pile:
+            if not resp:
+                continue
+            resp.headers = HeaderKeyDict(resp.getheaders())
+            responses.append(resp)
+            etag = sorted(responses, reverse=True,
+                          key=lambda r: Timestamp(
+                              r.headers.get('X-Backend-Timestamp')
+                          ))[0].headers.get('X-Object-Sysmeta-Ec-Etag')
+            responses = [r for r in responses if
+                         r.headers.get('X-Object-Sysmeta-Ec-Etag') == etag]
+
+            if len(responses) >= job['policy'].ec_ndata:
+                break
+        else:
+            self.logger.error(
+                'Unable to get enough responses (%s/%s) '
+                'to reconstruct %s with ETag %s' % (
+                    len(responses), job['policy'].ec_ndata,
+                    self._full_path(node, job['partition'],
+                                    metadata['name'], job['policy']),
+                    etag))
+            raise DiskFileError('Unable to reconstruct EC archive')
+
+        rebuilt_fragment_iter = self.make_rebuilt_fragment_iter(
+            responses[:job['policy'].ec_ndata], path, job['policy'],
+            fi_to_rebuild)
+        return RebuildingECDiskFileStream(metadata, fi_to_rebuild,
+                                          rebuilt_fragment_iter)
+
+    def _reconstruct(self, policy, fragment_payload, frag_index):
+        # XXX with jerasure this doesn't work if we need to rebuild a
+        # parity fragment, and not all data fragments are available
+        # segment = policy.pyeclib_driver.reconstruct(
+        #     fragment_payload, [frag_index])[0]
+
+        # for safety until pyeclib 1.0.7 we'll just use decode and encode
+        segment = policy.pyeclib_driver.decode(fragment_payload)
+        return policy.pyeclib_driver.encode(segment)[frag_index]
+
+    def make_rebuilt_fragment_iter(self, responses, path, policy, frag_index):
+        """
+        Turn a set of connections from backend object servers into a generator
+        that yields up the rebuilt fragment archive for frag_index.
+        """
+
+        def _get_one_fragment(resp):
+            buff = ''
+            remaining_bytes = policy.fragment_size
+            while remaining_bytes:
+                chunk = resp.read(remaining_bytes)
+                if not chunk:
+                    break
+                remaining_bytes -= len(chunk)
+                buff += chunk
+            return buff
+
+        def fragment_payload_iter():
+            # We need a fragment from each connections, so best to
+            # use a GreenPile to keep them ordered and in sync
+            pile = GreenPile(len(responses))
+            while True:
+                for resp in responses:
+                    pile.spawn(_get_one_fragment, resp)
+                try:
+                    with Timeout(self.node_timeout):
+                        fragment_payload = [fragment for fragment in pile]
+                except (Exception, Timeout):
+                    self.logger.exception(
+                        _("Error trying to rebuild %(path)s "
+                          "policy#%(policy)d frag#%(frag_index)s"), {
+                              'path': path,
+                              'policy': policy,
+                              'frag_index': frag_index,
+                          })
+                    break
+                if not all(fragment_payload):
+                    break
+                rebuilt_fragment = self._reconstruct(
+                    policy, fragment_payload, frag_index)
+                yield rebuilt_fragment
+
+        return fragment_payload_iter()
+
+    def stats_line(self):
+        """
+        Logs various stats for the currently running reconstruction pass.
+        """
+        if self.reconstruction_count:
+            elapsed = (time.time() - self.start) or 0.000001
+            rate = self.reconstruction_count / elapsed
+            self.logger.info(
+                _("%(reconstructed)d/%(total)d (%(percentage).2f%%)"
+                  " partitions reconstructed in %(time).2fs (%(rate).2f/sec, "
+                  "%(remaining)s remaining)"),
+                {'reconstructed': self.reconstruction_count,
+                 'total': self.job_count,
+                 'percentage':
+                 self.reconstruction_count * 100.0 / self.job_count,
+                 'time': time.time() - self.start, 'rate': rate,
+                 'remaining': '%d%s' % compute_eta(self.start,
+                                                   self.reconstruction_count,
+                                                   self.job_count)})
+            if self.suffix_count:
+                self.logger.info(
+                    _("%(checked)d suffixes checked - "
+                      "%(hashed).2f%% hashed, %(synced).2f%% synced"),
+                    {'checked': self.suffix_count,
+                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
+                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
+                self.partition_times.sort()
+                self.logger.info(
+                    _("Partition times: max %(max).4fs, "
+                      "min %(min).4fs, med %(med).4fs"),
+                    {'max': self.partition_times[-1],
+                     'min': self.partition_times[0],
+                     'med': self.partition_times[
+                         len(self.partition_times) // 2]})
+        else:
+            self.logger.info(
+                _("Nothing reconstructed for %s seconds."),
+                (time.time() - self.start))
+
+    def kill_coros(self):
+        """Utility function that kills all coroutines currently running."""
+        for coro in list(self.run_pool.coroutines_running):
+            try:
+                coro.kill(GreenletExit)
+            except GreenletExit:
+                pass
+
+    def heartbeat(self):
+        """
+        Loop that runs in the background during reconstruction.  It
+        periodically logs progress.
+        """
+        while True:
+            sleep(self.stats_interval)
+            self.stats_line()
+
+    def detect_lockups(self):
+        """
+        In testing, the pool.waitall() call very occasionally failed to return.
+        This is an attempt to make sure the reconstructor finishes its
+        reconstruction pass in some eventuality.
+        """
+        while True:
+            sleep(self.lockup_timeout)
+            if self.reconstruction_count == self.last_reconstruction_count:
+                self.logger.error(_("Lockup detected.. killing live coros."))
+                self.kill_coros()
+            self.last_reconstruction_count = self.reconstruction_count
+
+    def _get_hashes(self, policy, path, recalculate=None, do_listdir=False):
+        df_mgr = self._df_router[policy]
+        hashed, suffix_hashes = tpool_reraise(
+            df_mgr._get_hashes, path, recalculate=recalculate,
+            do_listdir=do_listdir, reclaim_age=self.reclaim_age)
+        self.logger.update_stats('suffix.hashes', hashed)
+        return suffix_hashes
+
+    def get_suffix_delta(self, local_suff, local_index,
+                         remote_suff, remote_index):
+        """
+        Compare the local suffix hashes with the remote suffix hashes
+        for the given local and remote fragment indexes.  Return those
+        suffixes which should be synced.
+
+        :param local_suff: the local suffix hashes (from _get_hashes)
+        :param local_index: the local fragment index for the job
+        :param remote_suff: the remote suffix hashes (from remote
+                            REPLICATE request)
+        :param remote_index: the remote fragment index for the job
+
+        :returns: a list of strings, the suffix dirs to sync
+        """
+        suffixes = []
+        for suffix, sub_dict_local in local_suff.iteritems():
+            sub_dict_remote = remote_suff.get(suffix, {})
+            if (sub_dict_local.get(None) != sub_dict_remote.get(None) or
+                    sub_dict_local.get(local_index) !=
+                    sub_dict_remote.get(remote_index)):
+                suffixes.append(suffix)
+        return suffixes
+
+    def rehash_remote(self, node, job, suffixes):
+        try:
+            with Timeout(self.http_timeout):
+                conn = http_connect(
+                    node['replication_ip'], node['replication_port'],
+                    node['device'], job['partition'], 'REPLICATE',
+                    '/' + '-'.join(sorted(suffixes)),
+                    headers=self.headers)
+                conn.getresponse().read()
+        except (Exception, Timeout):
+            self.logger.exception(
+                _("Trying to sync suffixes with %s") % self._full_path(
+                    node, job['partition'], '', job['policy']))
+
+    def _get_suffixes_to_sync(self, job, node):
+        """
+        For SYNC jobs we need to make a remote REPLICATE request to get
+        the remote node's current suffix's hashes and then compare to our
+        local suffix's hashes to decide which suffixes (if any) are out
+        of sync.
+
+        :param: the job dict, with the keys defined in ``_get_part_jobs``
+        :param node: the remote node dict
+        :returns: a (possibly empty) list of strings, the suffixes to be
+                  synced with the remote node.
+        """
+        # get hashes from the remote node
+        remote_suffixes = None
+        try:
+            with Timeout(self.http_timeout):
+                resp = http_connect(
+                    node['replication_ip'], node['replication_port'],
+                    node['device'], job['partition'], 'REPLICATE',
+                    '', headers=self.headers).getresponse()
+            if resp.status == HTTP_INSUFFICIENT_STORAGE:
+                self.logger.error(
+                    _('%s responded as unmounted'),
+                    self._full_path(node, job['partition'], '',
+                                    job['policy']))
+            elif resp.status != HTTP_OK:
+                self.logger.error(
+                    _("Invalid response %(resp)s "
+                      "from %(full_path)s"), {
+                          'resp': resp.status,
+                          'full_path': self._full_path(
+                              node, job['partition'], '',
+                              job['policy'])
+                      })
+            else:
+                remote_suffixes = pickle.loads(resp.read())
+        except (Exception, Timeout):
+            # all exceptions are logged here so that our caller can
+            # safely catch our exception and continue to the next node
+            # without logging
+            self.logger.exception('Unable to get remote suffix hashes '
+                                  'from %r' % self._full_path(
+                                      node, job['partition'], '',
+                                      job['policy']))
+
+        if remote_suffixes is None:
+            raise SuffixSyncError('Unable to get remote suffix hashes')
+
+        suffixes = self.get_suffix_delta(job['hashes'],
+                                         job['frag_index'],
+                                         remote_suffixes,
+                                         node['index'])
+        # now recalculate local hashes for suffixes that don't
+        # match so we're comparing the latest
+        local_suff = self._get_hashes(job['policy'], job['path'],
+                                      recalculate=suffixes)
+
+        suffixes = self.get_suffix_delta(local_suff,
+                                         job['frag_index'],
+                                         remote_suffixes,
+                                         node['index'])
+
+        self.suffix_count += len(suffixes)
+        return suffixes
+
+    def delete_reverted_objs(self, job, objects, frag_index):
+        """
+        For EC we can potentially revert only some of a partition
+        so we'll delete reverted objects here. Note that we delete
+        the fragment index of the file we sent to the remote node.
+
+        :param job: the job being processed
+        :param objects: a dict of objects to be deleted, each entry maps
+                        hash=>timestamp
+        :param frag_index: (int) the fragment index of data files to be deleted
+        """
+        df_mgr = self._df_router[job['policy']]
+        for object_hash, timestamp in objects.items():
+            try:
+                df = df_mgr.get_diskfile_from_hash(
+                    job['local_dev']['device'], job['partition'],
+                    object_hash, job['policy'],
+                    frag_index=frag_index)
+                df.purge(Timestamp(timestamp), frag_index)
+            except DiskFileError:
+                continue
+
+    def process_job(self, job):
+        """
+        Sync the local partition with the remote node(s) according to
+        the parameters of the job.  For primary nodes, the SYNC job type
+        will define both left and right hand sync_to nodes to ssync with
+        as defined by this primary nodes index in the node list based on
+        the fragment index found in the partition.  For non-primary
+        nodes (either handoff revert, or rebalance) the REVERT job will
+        define a single node in sync_to which is the proper/new home for
+        the fragment index.
+
+        N.B. ring rebalancing can be time consuming and handoff nodes'
+        fragment indexes do not have a stable order, it's possible to
+        have more than one REVERT job for a partition, and in some rare
+        failure conditions there may even also be a SYNC job for the
+        same partition - but each one will be processed separately
+        because each job will define a separate list of node(s) to
+        'sync_to'.
+
+        :param: the job dict, with the keys defined in ``_get_job_info``
+        """
+        self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
+        begin = time.time()
+        if job['job_type'] == REVERT:
+            self._revert(job, begin)
+        else:
+            self._sync(job, begin)
+        self.partition_times.append(time.time() - begin)
+        self.reconstruction_count += 1
+
+    def _sync(self, job, begin):
+        """
+        Process a SYNC job.
+        """
+        self.logger.increment(
+            'partition.update.count.%s' % (job['local_dev']['device'],))
+        # after our left and right partners, if there's some sort of
+        # failure we'll continue onto the remaining primary nodes and
+        # make sure they're in sync - or potentially rebuild missing
+        # fragments we find
+        dest_nodes = itertools.chain(
+            job['sync_to'],
+            # I think we could order these based on our index to better
+            # protect against a broken chain
+            itertools.ifilter(
+                lambda n: n['id'] not in (n['id'] for n in job['sync_to']),
+                job['policy'].object_ring.get_part_nodes(job['partition'])),
+        )
+        syncd_with = 0
+        for node in dest_nodes:
+            if syncd_with >= len(job['sync_to']):
+                # success!
+                break
+
+            try:
+                suffixes = self._get_suffixes_to_sync(job, node)
+            except SuffixSyncError:
+                continue
+
+            if not suffixes:
+                syncd_with += 1
+                continue
+
+            # ssync any out-of-sync suffixes with the remote node
+            success, _ = ssync_sender(
+                self, node, job, suffixes)()
+            # let remote end know to rehash it's suffixes
+            self.rehash_remote(node, job, suffixes)
+            # update stats for this attempt
+            self.suffix_sync += len(suffixes)
+            self.logger.update_stats('suffix.syncs', len(suffixes))
+            if success:
+                syncd_with += 1
+        self.logger.timing_since('partition.update.timing', begin)
+
+    def _revert(self, job, begin):
+        """
+        Process a REVERT job.
+        """
+        self.logger.increment(
+            'partition.delete.count.%s' % (job['local_dev']['device'],))
+        # we'd desperately like to push this partition back to it's
+        # primary location, but if that node is down, the next best thing
+        # is one of the handoff locations - which *might* be us already!
+        dest_nodes = itertools.chain(
+            job['sync_to'],
+            job['policy'].object_ring.get_more_nodes(job['partition']),
+        )
+        syncd_with = 0
+        reverted_objs = {}
+        for node in dest_nodes:
+            if syncd_with >= len(job['sync_to']):
+                break
+            if node['id'] == job['local_dev']['id']:
+                # this is as good a place as any for this data for now
+                break
+            success, in_sync_objs = ssync_sender(
+                self, node, job, job['suffixes'])()
+            self.rehash_remote(node, job, job['suffixes'])
+            if success:
+                syncd_with += 1
+                reverted_objs.update(in_sync_objs)
+        if syncd_with >= len(job['sync_to']):
+            self.delete_reverted_objs(
+                job, reverted_objs, job['frag_index'])
+        self.logger.timing_since('partition.delete.timing', begin)
+
+    def _get_part_jobs(self, local_dev, part_path, partition, policy):
+        """
+        Helper function to build jobs for a partition, this method will
+        read the suffix hashes and create job dictionaries to describe
+        the needed work.  There will be one job for each fragment index
+        discovered in the partition.
+
+        For a fragment index which corresponds to this node's ring
+        index, a job with job_type SYNC will be created to ensure that
+        the left and right hand primary ring nodes for the part have the
+        corresponding left and right hand fragment archives.
+
+        A fragment index (or entire partition) for which this node is
+        not the primary corresponding node, will create job(s) with
+        job_type REVERT to ensure that fragment archives are pushed to
+        the correct node and removed from this one.
+
+        A partition may result in multiple jobs.  Potentially many
+        REVERT jobs, and zero or one SYNC job.
+
+        :param local_dev:  the local device
+        :param part_path: full path to partition
+        :param partition: partition number
+        :param policy: the policy
+
+        :returns: a list of dicts of job info
+        """
+        # find all the fi's in the part, and which suffixes have them
+        hashes = self._get_hashes(policy, part_path, do_listdir=True)
+        non_data_fragment_suffixes = []
+        data_fi_to_suffixes = defaultdict(list)
+        for suffix, fi_hash in hashes.items():
+            if not fi_hash:
+                # this is for sanity and clarity, normally an empty
+                # suffix would get del'd from the hashes dict, but an
+                # OSError trying to re-hash the suffix could leave the
+                # value empty - it will log the exception; but there's
+                # no way to properly address this suffix at this time.
+                continue
+            data_frag_indexes = [f for f in fi_hash if f is not None]
+            if not data_frag_indexes:
+                non_data_fragment_suffixes.append(suffix)
+            else:
+                for fi in data_frag_indexes:
+                    data_fi_to_suffixes[fi].append(suffix)
+
+        # helper to ensure consistent structure of jobs
+        def build_job(job_type, frag_index, suffixes, sync_to):
+            return {
+                'job_type': job_type,
+                'frag_index': frag_index,
+                'suffixes': suffixes,
+                'sync_to': sync_to,
+                'partition': partition,
+                'path': part_path,
+                'hashes': hashes,
+                'policy': policy,
+                'local_dev': local_dev,
+                # ssync likes to have it handy
+                'device': local_dev['device'],
+            }
+
+        # aggregate jobs for all the fragment index in this part
+        jobs = []
+
+        # check the primary nodes - to see if the part belongs here
+        part_nodes = policy.object_ring.get_part_nodes(partition)
+        for node in part_nodes:
+            if node['id'] == local_dev['id']:
+                # this partition belongs here, we'll need a sync job
+                frag_index = node['index']
+                try:
+                    suffixes = data_fi_to_suffixes.pop(frag_index)
+                except KeyError:
+                    suffixes = []
+                sync_job = build_job(
+                    job_type=SYNC,
+                    frag_index=frag_index,
+                    suffixes=suffixes,
+                    sync_to=_get_partners(frag_index, part_nodes),
+                )
+                # ssync callback to rebuild missing fragment_archives
+                sync_job['sync_diskfile_builder'] = self.reconstruct_fa
+                jobs.append(sync_job)
+                break
+
+        # assign remaining data fragment suffixes to revert jobs
+        ordered_fis = sorted((len(suffixes), fi) for fi, suffixes
+                             in data_fi_to_suffixes.items())
+        for count, fi in ordered_fis:
+            revert_job = build_job(
+                job_type=REVERT,
+                frag_index=fi,
+                suffixes=data_fi_to_suffixes[fi],
+                sync_to=[part_nodes[fi]],
+            )
+            jobs.append(revert_job)
+
+        # now we need to assign suffixes that have no data fragments
+        if non_data_fragment_suffixes:
+            if jobs:
+                # the first job will be either the sync_job, or the
+                # revert_job for the fragment index that is most common
+                # among the suffixes
+                jobs[0]['suffixes'].extend(non_data_fragment_suffixes)
+            else:
+                # this is an unfortunate situation, we need a revert job to
+                # push partitions off this node, but none of the suffixes
+                # have any data fragments to hint at which node would be a
+                # good candidate to receive the tombstones.
+                jobs.append(build_job(
+                    job_type=REVERT,
+                    frag_index=None,
+                    suffixes=non_data_fragment_suffixes,
+                    # this is super safe
+                    sync_to=part_nodes,
+                    # something like this would be probably be better
+                    # sync_to=random.sample(part_nodes, 3),
+                ))
+        # return a list of jobs for this part
+        return jobs
+
+    def collect_parts(self, override_devices=None,
+                      override_partitions=None):
+        """
+        Helper for yielding partitions in the top level reconstructor
+        """
+        override_devices = override_devices or []
+        override_partitions = override_partitions or []
+        ips = whataremyips()
+        for policy in POLICIES:
+            if policy.policy_type != EC_POLICY:
+                continue
+            self._diskfile_mgr = self._df_router[policy]
+            self.load_object_ring(policy)
+            data_dir = get_data_dir(policy)
+            local_devices = itertools.ifilter(
+                lambda dev: dev and is_local_device(
+                    ips, self.port,
+                    dev['replication_ip'], dev['replication_port']),
+                policy.object_ring.devs)
+            for local_dev in local_devices:
+                if override_devices and (local_dev['device'] not in
+                                         override_devices):
+                    continue
+                dev_path = join(self.devices_dir, local_dev['device'])
+                obj_path = join(dev_path, data_dir)
+                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
+                if self.mount_check and not ismount(dev_path):
+                    self.logger.warn(_('%s is not mounted'),
+                                     local_dev['device'])
+                    continue
+                unlink_older_than(tmp_path, time.time() -
+                                  self.reclaim_age)
+                if not os.path.exists(obj_path):
+                    try:
+                        mkdirs(obj_path)
+                    except Exception:
+                        self.logger.exception(
+                            'Unable to create %s' % obj_path)
+                    continue
+                try:
+                    partitions = os.listdir(obj_path)
+                except OSError:
+                    self.logger.exception(
+                        'Unable to list partitions in %r' % obj_path)
+                    continue
+                for partition in partitions:
+                    part_path = join(obj_path, partition)
+                    if not (partition.isdigit() and
+                            os.path.isdir(part_path)):
+                        self.logger.warning(
+                            'Unexpected entity in data dir: %r' % part_path)
+                        remove_file(part_path)
+                        continue
+                    partition = int(partition)
+                    if override_partitions and (partition not in
+                                                override_partitions):
+                        continue
+                    part_info = {
+                        'local_dev': local_dev,
+                        'policy': policy,
+                        'partition': partition,
+                        'part_path': part_path,
+                    }
+                    yield part_info
+
+    def build_reconstruction_jobs(self, part_info):
+        """
+        Helper function for collect_jobs to build jobs for reconstruction
+        using EC style storage policy
+        """
+        jobs = self._get_part_jobs(**part_info)
+        random.shuffle(jobs)
+        if self.handoffs_first:
+            # Move the handoff revert jobs to the front of the list
+            jobs.sort(key=lambda job: job['job_type'], reverse=True)
+        self.job_count += len(jobs)
+        return jobs
+
+    def _reset_stats(self):
+        self.start = time.time()
+        self.job_count = 0
+        self.suffix_count = 0
+        self.suffix_sync = 0
+        self.suffix_hash = 0
+        self.reconstruction_count = 0
+        self.last_reconstruction_count = -1
+
+    def delete_partition(self, path):
+        self.logger.info(_("Removing partition: %s"), path)
+        tpool.execute(shutil.rmtree, path, ignore_errors=True)
+
+    def reconstruct(self, **kwargs):
+        """Run a reconstruction pass"""
+        self._reset_stats()
+        self.partition_times = []
+
+        stats = spawn(self.heartbeat)
+        lockup_detector = spawn(self.detect_lockups)
+        sleep()  # Give spawns a cycle
+
+        try:
+            self.run_pool = GreenPool(size=self.concurrency)
+            for part_info in self.collect_parts(**kwargs):
+                if not self.check_ring(part_info['policy'].object_ring):
+                    self.logger.info(_("Ring change detected. Aborting "
+                                       "current reconstruction pass."))
+                    return
+                jobs = self.build_reconstruction_jobs(part_info)
+                if not jobs:
+                    # If this part belongs on this node, _get_part_jobs
+                    # will *always* build a sync_job - even if there's
+                    # no suffixes in the partition that needs to sync.
+                    # If there's any suffixes in the partition then our
+                    # job list would have *at least* one revert job.
+                    # Therefore we know this part a) doesn't belong on
+                    # this node and b) doesn't have any suffixes in it.
+                    self.run_pool.spawn(self.delete_partition,
+                                        part_info['part_path'])
+                for job in jobs:
+                    self.run_pool.spawn(self.process_job, job)
+            with Timeout(self.lockup_timeout):
+                self.run_pool.waitall()
+        except (Exception, Timeout):
+            self.logger.exception(_("Exception in top-level"
+                                    "reconstruction loop"))
+            self.kill_coros()
+        finally:
+            stats.kill()
+            lockup_detector.kill()
+            self.stats_line()
+
+    def run_once(self, *args, **kwargs):
+        start = time.time()
+        self.logger.info(_("Running object reconstructor in script mode."))
+        override_devices = list_from_csv(kwargs.get('devices'))
+        override_partitions = [int(p) for p in
+                               list_from_csv(kwargs.get('partitions'))]
+        self.reconstruct(
+            override_devices=override_devices,
+            override_partitions=override_partitions)
+        total = (time.time() - start) / 60
+        self.logger.info(
+            _("Object reconstruction complete (once). (%.02f minutes)"), total)
+        if not (override_partitions or override_devices):
+            dump_recon_cache({'object_reconstruction_time': total,
+                              'object_reconstruction_last': time.time()},
+                             self.rcache, self.logger)
+
+    def run_forever(self, *args, **kwargs):
+        self.logger.info(_("Starting object reconstructor in daemon mode."))
+        # Run the reconstructor continually
+        while True:
+            start = time.time()
+            self.logger.info(_("Starting object reconstruction pass."))
+            # Run the reconstructor
+            self.reconstruct()
+            total = (time.time() - start) / 60
+            self.logger.info(
+                _("Object reconstruction complete. (%.02f minutes)"), total)
+            dump_recon_cache({'object_reconstruction_time': total,
+                              'object_reconstruction_last': time.time()},
+                             self.rcache, self.logger)
+            self.logger.debug('reconstruction sleeping for %s seconds.',
+                              self.run_pause)
+            sleep(self.run_pause)
--- a/swift/obj/replicator.py
+++ b/swift/obj/replicator.py
@ -39,7 +39,7 @@ from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
 from swift.obj import ssync_sender
 from swift.obj.diskfile import (DiskFileManager, get_hashes, get_data_dir,
                                get_tmp_dir)
-from swift.common.storage_policy import POLICIES
+from swift.common.storage_policy import POLICIES, REPL_POLICY


 hubs.use_hub(get_hub())
@ -110,14 +110,15 @@ class ObjectReplicator(Daemon):
        """
        return self.sync_method(node, job, suffixes, *args, **kwargs)

-    def get_object_ring(self, policy_idx):
+    def load_object_ring(self, policy):
        """
-        Get the ring object to use to handle a request based on its policy.
+        Make sure the policy's rings are loaded.

-        :policy_idx: policy index as defined in swift.conf
+        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
-        return POLICIES.get_object_ring(policy_idx, self.swift_dir)
+        policy.load_ring(self.swift_dir)
+        return policy.object_ring

    def _rsync(self, args):
        """
@ -170,7 +171,7 @@ class ObjectReplicator(Daemon):
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
-            return False, set()
+            return False, {}
        args = [
            'rsync',
            '--recursive',
@ -195,11 +196,11 @@ class ObjectReplicator(Daemon):
                args.append(spath)
                had_any = True
        if not had_any:
-            return False, set()
-        data_dir = get_data_dir(job['policy_idx'])
+            return False, {}
+        data_dir = get_data_dir(job['policy'])
        args.append(join(rsync_module, node['device'],
                    data_dir, job['partition']))
-        return self._rsync(args) == 0, set()
+        return self._rsync(args) == 0, {}

    def ssync(self, node, job, suffixes, remote_check_objs=None):
        return ssync_sender.Sender(
@ -231,7 +232,7 @@ class ObjectReplicator(Daemon):
                    if len(suff) == 3 and isdir(join(path, suff))]
        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'],))
-        self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx']
+        self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        begin = time.time()
        try:
            responses = []
@ -245,8 +246,9 @@ class ObjectReplicator(Daemon):
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
-                    # cand_objs is a list of objects for deletion
-                    success, cand_objs = self.sync(
+                    # candidates is a dict(hash=>timestamp) of objects
+                    # for deletion
+                    success, candidates = self.sync(
                        node, job, suffixes, **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
@ -257,7 +259,8 @@ class ObjectReplicator(Daemon):
                                '/' + '-'.join(suffixes), headers=self.headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
-                            synced_remote_regions[node['region']] = cand_objs
+                            synced_remote_regions[node['region']] = \
+                                candidates.keys()
                    responses.append(success)
                for region, cand_objs in synced_remote_regions.iteritems():
                    if delete_objs is None:
@ -314,7 +317,7 @@ class ObjectReplicator(Daemon):
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
-        self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx']
+        self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
@ -328,7 +331,8 @@ class ObjectReplicator(Daemon):
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
-                job['object_ring'].get_more_nodes(int(job['partition'])))
+                job['policy'].object_ring.get_more_nodes(
+                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
@ -460,16 +464,15 @@ class ObjectReplicator(Daemon):
                self.kill_coros()
            self.last_replication_count = self.replication_count

-    def process_repl(self, policy, ips, override_devices=None,
-                     override_partitions=None):
+    def build_replication_jobs(self, policy, ips, override_devices=None,
+                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
-        obj_ring = self.get_object_ring(policy.idx)
-        data_dir = get_data_dir(policy.idx)
-        for local_dev in [dev for dev in obj_ring.devs
+        data_dir = get_data_dir(policy)
+        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
@ -479,7 +482,7 @@ class ObjectReplicator(Daemon):
                                   or dev['device'] in override_devices))]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
-            tmp_path = join(dev_path, get_tmp_dir(int(policy)))
+            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
@ -497,7 +500,8 @@ class ObjectReplicator(Daemon):

                try:
                    job_path = join(obj_path, partition)
-                    part_nodes = obj_ring.get_part_nodes(int(partition))
+                    part_nodes = policy.object_ring.get_part_nodes(
+                        int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
@ -506,9 +510,8 @@ class ObjectReplicator(Daemon):
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
-                             policy_idx=policy.idx,
+                             policy=policy,
                             partition=partition,
-                             object_ring=obj_ring,
                             region=local_dev['region']))
                except ValueError:
                    continue
@ -530,13 +533,15 @@ class ObjectReplicator(Daemon):
        jobs = []
        ips = whataremyips()
        for policy in POLICIES:
-            if (override_policies is not None
-                    and str(policy.idx) not in override_policies):
-                continue
-            # may need to branch here for future policy types
-            jobs += self.process_repl(policy, ips,
-                                      override_devices=override_devices,
-                                      override_partitions=override_partitions)
+            if policy.policy_type == REPL_POLICY:
+                if (override_policies is not None and
+                        str(policy.idx) not in override_policies):
+                    continue
+                # ensure rings are loaded for policy
+                self.load_object_ring(policy)
+                jobs += self.build_replication_jobs(
+                    policy, ips, override_devices=override_devices,
+                    override_partitions=override_partitions)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
@ -569,7 +574,7 @@ class ObjectReplicator(Daemon):
                if self.mount_check and not ismount(dev_path):
                    self.logger.warn(_('%s is not mounted'), job['device'])
                    continue
-                if not self.check_ring(job['object_ring']):
+                if not self.check_ring(job['policy'].object_ring):
                    self.logger.info(_("Ring change detected. Aborting "
                                       "current replication pass."))
                    return
--- a/swift/obj/server.py
+++ b/swift/obj/server.py
@ -16,10 +16,12 @@
 """ Object Server for Swift """

 import cPickle as pickle
+import json
 import os
 import multiprocessing
 import time
 import traceback
+import rfc822
 import socket
 import math
 from swift import gettext_ as _
@ -30,7 +32,7 @@ from eventlet import sleep, wsgi, Timeout
 from swift.common.utils import public, get_logger, \
    config_true_value, timing_stats, replication, \
    normalize_delete_at_timestamp, get_log_line, Timestamp, \
-    get_expirer_container
+    get_expirer_container, iter_multipart_mime_documents
 from swift.common.bufferedhttp import http_connect
 from swift.common.constraints import check_object_creation, \
    valid_timestamp, check_utf8
@ -48,8 +50,35 @@ from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPCreated, \
    HTTPPreconditionFailed, HTTPRequestTimeout, HTTPUnprocessableEntity, \
    HTTPClientDisconnect, HTTPMethodNotAllowed, Request, Response, \
    HTTPInsufficientStorage, HTTPForbidden, HTTPException, HeaderKeyDict, \
-    HTTPConflict
-from swift.obj.diskfile import DATAFILE_SYSTEM_META, DiskFileManager
+    HTTPConflict, HTTPServerError
+from swift.obj.diskfile import DATAFILE_SYSTEM_META, DiskFileRouter
+
+
+def iter_mime_headers_and_bodies(wsgi_input, mime_boundary, read_chunk_size):
+    mime_documents_iter = iter_multipart_mime_documents(
+        wsgi_input, mime_boundary, read_chunk_size)
+
+    for file_like in mime_documents_iter:
+        hdrs = HeaderKeyDict(rfc822.Message(file_like, 0))
+        yield (hdrs, file_like)
+
+
+def drain(file_like, read_size, timeout):
+    """
+    Read and discard any bytes from file_like.
+
+    :param file_like: file-like object to read from
+    :param read_size: how big a chunk to read at a time
+    :param timeout: how long to wait for a read (use None for no timeout)
+
+    :raises ChunkReadTimeout: if no chunk was read in time
+    """
+
+    while True:
+        with ChunkReadTimeout(timeout):
+            chunk = file_like.read(read_size)
+            if not chunk:
+                break


 class EventletPlungerString(str):
@ -142,7 +171,7 @@ class ObjectController(BaseStorageServer):

        # Common on-disk hierarchy shared across account, container and object
        # servers.
-        self._diskfile_mgr = DiskFileManager(conf, self.logger)
+        self._diskfile_router = DiskFileRouter(conf, self.logger)
        # This is populated by global_conf_callback way below as the semaphore
        # is shared by all workers.
        if 'replication_semaphore' in conf:
@ -156,7 +185,7 @@ class ObjectController(BaseStorageServer):
            conf.get('replication_failure_ratio') or 1.0)

    def get_diskfile(self, device, partition, account, container, obj,
-                     policy_idx, **kwargs):
+                     policy, **kwargs):
        """
        Utility method for instantiating a DiskFile object supporting a given
        REST API.
@ -165,11 +194,11 @@ class ObjectController(BaseStorageServer):
        DiskFile class would simply over-ride this method to provide that
        behavior.
        """
-        return self._diskfile_mgr.get_diskfile(
-            device, partition, account, container, obj, policy_idx, **kwargs)
+        return self._diskfile_router[policy].get_diskfile(
+            device, partition, account, container, obj, policy, **kwargs)

    def async_update(self, op, account, container, obj, host, partition,
-                     contdevice, headers_out, objdevice, policy_index):
+                     contdevice, headers_out, objdevice, policy):
        """
        Sends or saves an async update.

@ -183,7 +212,7 @@ class ObjectController(BaseStorageServer):
        :param headers_out: dictionary of headers to send in the container
                            request
        :param objdevice: device name that the object is in
-        :param policy_index: the associated storage policy index
+        :param policy: the associated BaseStoragePolicy instance
        """
        headers_out['user-agent'] = 'object-server %s' % os.getpid()
        full_path = '/%s/%s/%s' % (account, container, obj)
@ -213,12 +242,11 @@ class ObjectController(BaseStorageServer):
        data = {'op': op, 'account': account, 'container': container,
                'obj': obj, 'headers': headers_out}
        timestamp = headers_out['x-timestamp']
-        self._diskfile_mgr.pickle_async_update(objdevice, account, container,
-                                               obj, data, timestamp,
-                                               policy_index)
+        self._diskfile_router[policy].pickle_async_update(
+            objdevice, account, container, obj, data, timestamp, policy)

    def container_update(self, op, account, container, obj, request,
-                         headers_out, objdevice, policy_idx):
+                         headers_out, objdevice, policy):
        """
        Update the container when objects are updated.

@ -230,6 +258,7 @@ class ObjectController(BaseStorageServer):
        :param headers_out: dictionary of headers to send in the container
                            request(s)
        :param objdevice: device name that the object is in
+        :param policy:  the BaseStoragePolicy instance
        """
        headers_in = request.headers
        conthosts = [h.strip() for h in
@ -255,14 +284,14 @@ class ObjectController(BaseStorageServer):

        headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-')
        headers_out['referer'] = request.as_referer()
-        headers_out['X-Backend-Storage-Policy-Index'] = policy_idx
+        headers_out['X-Backend-Storage-Policy-Index'] = int(policy)
        for conthost, contdevice in updates:
            self.async_update(op, account, container, obj, conthost,
                              contpartition, contdevice, headers_out,
-                              objdevice, policy_idx)
+                              objdevice, policy)

    def delete_at_update(self, op, delete_at, account, container, obj,
-                         request, objdevice, policy_index):
+                         request, objdevice, policy):
        """
        Update the expiring objects container when objects are updated.

@ -273,7 +302,7 @@ class ObjectController(BaseStorageServer):
        :param obj: object name
        :param request: the original request driving the update
        :param objdevice: device name that the object is in
-        :param policy_index: the policy index to be used for tmp dir
+        :param policy: the BaseStoragePolicy instance (used for tmp dir)
        """
        if config_true_value(
                request.headers.get('x-backend-replication', 'f')):
@ -333,13 +362,66 @@ class ObjectController(BaseStorageServer):
                op, self.expiring_objects_account, delete_at_container,
                '%s-%s/%s/%s' % (delete_at, account, container, obj),
                host, partition, contdevice, headers_out, objdevice,
-                policy_index)
+                policy)
+
+    def _make_timeout_reader(self, file_like):
+        def timeout_reader():
+            with ChunkReadTimeout(self.client_timeout):
+                return file_like.read(self.network_chunk_size)
+        return timeout_reader
+
+    def _read_put_commit_message(self, mime_documents_iter):
+        rcvd_commit = False
+        try:
+            with ChunkReadTimeout(self.client_timeout):
+                commit_hdrs, commit_iter = next(mime_documents_iter)
+                if commit_hdrs.get('X-Document', None) == "put commit":
+                    rcvd_commit = True
+            drain(commit_iter, self.network_chunk_size, self.client_timeout)
+        except ChunkReadTimeout:
+            raise HTTPClientDisconnect()
+        except StopIteration:
+            raise HTTPBadRequest(body="couldn't find PUT commit MIME doc")
+        return rcvd_commit
+
+    def _read_metadata_footer(self, mime_documents_iter):
+        try:
+            with ChunkReadTimeout(self.client_timeout):
+                footer_hdrs, footer_iter = next(mime_documents_iter)
+        except ChunkReadTimeout:
+            raise HTTPClientDisconnect()
+        except StopIteration:
+            raise HTTPBadRequest(body="couldn't find footer MIME doc")
+
+        timeout_reader = self._make_timeout_reader(footer_iter)
+        try:
+            footer_body = ''.join(iter(timeout_reader, ''))
+        except ChunkReadTimeout:
+            raise HTTPClientDisconnect()
+
+        footer_md5 = footer_hdrs.get('Content-MD5')
+        if not footer_md5:
+            raise HTTPBadRequest(body="no Content-MD5 in footer")
+        if footer_md5 != md5(footer_body).hexdigest():
+            raise HTTPUnprocessableEntity(body="footer MD5 mismatch")
+
+        try:
+            return HeaderKeyDict(json.loads(footer_body))
+        except ValueError:
+            raise HTTPBadRequest("invalid JSON for footer doc")
+
+    def _check_container_override(self, update_headers, metadata):
+        for key, val in metadata.iteritems():
+            override_prefix = 'x-backend-container-update-override-'
+            if key.lower().startswith(override_prefix):
+                override = key.lower().replace(override_prefix, 'x-')
+                update_headers[override] = val

    @public
    @timing_stats()
    def POST(self, request):
        """Handle HTTP POST requests for the Swift Object Server."""
-        device, partition, account, container, obj, policy_idx = \
+        device, partition, account, container, obj, policy = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        new_delete_at = int(request.headers.get('X-Delete-At') or 0)
@ -349,7 +431,7 @@ class ObjectController(BaseStorageServer):
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
-                policy_idx=policy_idx)
+                policy=policy)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
@ -374,11 +456,11 @@ class ObjectController(BaseStorageServer):
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update('PUT', new_delete_at, account, container,
-                                      obj, request, device, policy_idx)
+                                      obj, request, device, policy)
            if orig_delete_at:
                self.delete_at_update('DELETE', orig_delete_at, account,
                                      container, obj, request, device,
-                                      policy_idx)
+                                      policy)
        try:
            disk_file.write_metadata(metadata)
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
@ -389,7 +471,7 @@ class ObjectController(BaseStorageServer):
    @timing_stats()
    def PUT(self, request):
        """Handle HTTP PUT requests for the Swift Object Server."""
-        device, partition, account, container, obj, policy_idx = \
+        device, partition, account, container, obj, policy = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        error_response = check_object_creation(request, obj)
@ -404,10 +486,22 @@ class ObjectController(BaseStorageServer):
        except ValueError as e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
+
+        # In case of multipart-MIME put, the proxy sends a chunked request,
+        # but may let us know the real content length so we can verify that
+        # we have enough disk space to hold the object.
+        if fsize is None:
+            fsize = request.headers.get('X-Backend-Obj-Content-Length')
+            if fsize is not None:
+                try:
+                    fsize = int(fsize)
+                except ValueError as e:
+                    return HTTPBadRequest(body=str(e), request=request,
+                                          content_type='text/plain')
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
-                policy_idx=policy_idx)
+                policy=policy)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
@ -439,13 +533,51 @@ class ObjectController(BaseStorageServer):
            with disk_file.create(size=fsize) as writer:
                upload_size = 0

-                def timeout_reader():
-                    with ChunkReadTimeout(self.client_timeout):
-                        return request.environ['wsgi.input'].read(
-                            self.network_chunk_size)
+                # If the proxy wants to send us object metadata after the
+                # object body, it sets some headers. We have to tell the
+                # proxy, in the 100 Continue response, that we're able to
+                # parse a multipart MIME document and extract the object and
+                # metadata from it. If we don't, then the proxy won't
+                # actually send the footer metadata.
+                have_metadata_footer = False
+                use_multiphase_commit = False
+                mime_documents_iter = iter([])
+                obj_input = request.environ['wsgi.input']

+                hundred_continue_headers = []
+                if config_true_value(
+                        request.headers.get(
+                            'X-Backend-Obj-Multiphase-Commit')):
+                    use_multiphase_commit = True
+                    hundred_continue_headers.append(
+                        ('X-Obj-Multiphase-Commit', 'yes'))
+
+                if config_true_value(
+                        request.headers.get('X-Backend-Obj-Metadata-Footer')):
+                    have_metadata_footer = True
+                    hundred_continue_headers.append(
+                        ('X-Obj-Metadata-Footer', 'yes'))
+
+                if have_metadata_footer or use_multiphase_commit:
+                    obj_input.set_hundred_continue_response_headers(
+                        hundred_continue_headers)
+                    mime_boundary = request.headers.get(
+                        'X-Backend-Obj-Multipart-Mime-Boundary')
+                    if not mime_boundary:
+                        return HTTPBadRequest("no MIME boundary")
+
+                    try:
+                        with ChunkReadTimeout(self.client_timeout):
+                            mime_documents_iter = iter_mime_headers_and_bodies(
+                                request.environ['wsgi.input'],
+                                mime_boundary, self.network_chunk_size)
+                            _junk_hdrs, obj_input = next(mime_documents_iter)
+                    except ChunkReadTimeout:
+                        return HTTPRequestTimeout(request=request)
+
+                timeout_reader = self._make_timeout_reader(obj_input)
                try:
-                    for chunk in iter(lambda: timeout_reader(), ''):
+                    for chunk in iter(timeout_reader, ''):
                        start_time = time.time()
                        if start_time > upload_expiration:
                            self.logger.increment('PUT.timeouts')
@ -461,9 +593,16 @@ class ObjectController(BaseStorageServer):
                        upload_size)
                if fsize is not None and fsize != upload_size:
                    return HTTPClientDisconnect(request=request)
+
+                footer_meta = {}
+                if have_metadata_footer:
+                    footer_meta = self._read_metadata_footer(
+                        mime_documents_iter)
+
+                request_etag = (footer_meta.get('etag') or
+                                request.headers.get('etag', '')).lower()
                etag = etag.hexdigest()
-                if 'etag' in request.headers and \
-                        request.headers['etag'].lower() != etag:
+                if request_etag and request_etag != etag:
                    return HTTPUnprocessableEntity(request=request)
                metadata = {
                    'X-Timestamp': request.timestamp.internal,
@ -473,6 +612,8 @@ class ObjectController(BaseStorageServer):
                }
                metadata.update(val for val in request.headers.iteritems()
                                if is_sys_or_user_meta('object', val[0]))
+                metadata.update(val for val in footer_meta.iteritems()
+                                if is_sys_or_user_meta('object', val[0]))
                headers_to_copy = (
                    request.headers.get(
                        'X-Backend-Replication-Headers', '').split() +
@ -482,39 +623,63 @@ class ObjectController(BaseStorageServer):
                        header_caps = header_key.title()
                        metadata[header_caps] = request.headers[header_key]
                writer.put(metadata)
+
+                # if the PUT requires a two-phase commit (a data and a commit
+                # phase) send the proxy server another 100-continue response
+                # to indicate that we are finished writing object data
+                if use_multiphase_commit:
+                    request.environ['wsgi.input'].\
+                        send_hundred_continue_response()
+                    if not self._read_put_commit_message(mime_documents_iter):
+                        return HTTPServerError(request=request)
+                    # got 2nd phase confirmation, write a timestamp.durable
+                    # state file to indicate a successful PUT
+
+                writer.commit(request.timestamp)
+
+                # Drain any remaining MIME docs from the socket. There
+                # shouldn't be any, but we must read the whole request body.
+                try:
+                    while True:
+                        with ChunkReadTimeout(self.client_timeout):
+                            _junk_hdrs, _junk_body = next(mime_documents_iter)
+                        drain(_junk_body, self.network_chunk_size,
+                              self.client_timeout)
+                except ChunkReadTimeout:
+                    raise HTTPClientDisconnect()
+                except StopIteration:
+                    pass
+
        except (DiskFileXattrNotSupported, DiskFileNoSpace):
            return HTTPInsufficientStorage(drive=device, request=request)
        if orig_delete_at != new_delete_at:
            if new_delete_at:
                self.delete_at_update(
                    'PUT', new_delete_at, account, container, obj, request,
-                    device, policy_idx)
+                    device, policy)
            if orig_delete_at:
                self.delete_at_update(
                    'DELETE', orig_delete_at, account, container, obj,
-                    request, device, policy_idx)
+                    request, device, policy)
        update_headers = HeaderKeyDict({
            'x-size': metadata['Content-Length'],
            'x-content-type': metadata['Content-Type'],
            'x-timestamp': metadata['X-Timestamp'],
            'x-etag': metadata['ETag']})
        # apply any container update header overrides sent with request
-        for key, val in request.headers.iteritems():
-            override_prefix = 'x-backend-container-update-override-'
-            if key.lower().startswith(override_prefix):
-                override = key.lower().replace(override_prefix, 'x-')
-                update_headers[override] = val
+        self._check_container_override(update_headers, request.headers)
+        self._check_container_override(update_headers, footer_meta)
        self.container_update(
            'PUT', account, container, obj, request,
            update_headers,
-            device, policy_idx)
+            device, policy)
        return HTTPCreated(request=request, etag=etag)

    @public
    @timing_stats()
    def GET(self, request):
        """Handle HTTP GET requests for the Swift Object Server."""
-        device, partition, account, container, obj, policy_idx = \
+        device, partition, account, container, obj, policy = \
            get_name_and_placement(request, 5, 5, True)
        keep_cache = self.keep_cache_private or (
            'X-Auth-Token' not in request.headers and
@ -522,7 +687,7 @@ class ObjectController(BaseStorageServer):
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
-                policy_idx=policy_idx)
+                policy=policy)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
@ -533,9 +698,14 @@ class ObjectController(BaseStorageServer):
                keep_cache = (self.keep_cache_private or
                              ('X-Auth-Token' not in request.headers and
                               'X-Storage-Token' not in request.headers))
+                conditional_etag = None
+                if 'X-Backend-Etag-Is-At' in request.headers:
+                    conditional_etag = metadata.get(
+                        request.headers['X-Backend-Etag-Is-At'])
                response = Response(
                    app_iter=disk_file.reader(keep_cache=keep_cache),
-                    request=request, conditional_response=True)
+                    request=request, conditional_response=True,
+                    conditional_etag=conditional_etag)
                response.headers['Content-Type'] = metadata.get(
                    'Content-Type', 'application/octet-stream')
                for key, value in metadata.iteritems():
@ -567,12 +737,12 @@ class ObjectController(BaseStorageServer):
    @timing_stats(sample_rate=0.8)
    def HEAD(self, request):
        """Handle HTTP HEAD requests for the Swift Object Server."""
-        device, partition, account, container, obj, policy_idx = \
+        device, partition, account, container, obj, policy = \
            get_name_and_placement(request, 5, 5, True)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
-                policy_idx=policy_idx)
+                policy=policy)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
@ -585,7 +755,12 @@ class ObjectController(BaseStorageServer):
                headers['X-Backend-Timestamp'] = e.timestamp.internal
            return HTTPNotFound(request=request, headers=headers,
                                conditional_response=True)
-        response = Response(request=request, conditional_response=True)
+        conditional_etag = None
+        if 'X-Backend-Etag-Is-At' in request.headers:
+            conditional_etag = metadata.get(
+                request.headers['X-Backend-Etag-Is-At'])
+        response = Response(request=request, conditional_response=True,
+                            conditional_etag=conditional_etag)
        response.headers['Content-Type'] = metadata.get(
            'Content-Type', 'application/octet-stream')
        for key, value in metadata.iteritems():
@ -609,13 +784,13 @@ class ObjectController(BaseStorageServer):
    @timing_stats()
    def DELETE(self, request):
        """Handle HTTP DELETE requests for the Swift Object Server."""
-        device, partition, account, container, obj, policy_idx = \
+        device, partition, account, container, obj, policy = \
            get_name_and_placement(request, 5, 5, True)
        req_timestamp = valid_timestamp(request)
        try:
            disk_file = self.get_diskfile(
                device, partition, account, container, obj,
-                policy_idx=policy_idx)
+                policy=policy)
        except DiskFileDeviceUnavailable:
            return HTTPInsufficientStorage(drive=device, request=request)
        try:
@ -667,13 +842,13 @@ class ObjectController(BaseStorageServer):
        if orig_delete_at:
            self.delete_at_update('DELETE', orig_delete_at, account,
                                  container, obj, request, device,
-                                  policy_idx)
+                                  policy)
        if orig_timestamp < req_timestamp:
            disk_file.delete(req_timestamp)
            self.container_update(
                'DELETE', account, container, obj, request,
                HeaderKeyDict({'x-timestamp': req_timestamp.internal}),
-                device, policy_idx)
+                device, policy)
        return response_class(
            request=request,
            headers={'X-Backend-Timestamp': response_timestamp.internal})
@ -685,12 +860,17 @@ class ObjectController(BaseStorageServer):
        """
        Handle REPLICATE requests for the Swift Object Server.  This is used
        by the object replicator to get hashes for directories.
+
+        Note that the name REPLICATE is preserved for historical reasons as
+        this verb really just returns the hashes information for the specified
+        parameters and is used, for example, by both replication and EC.
        """
-        device, partition, suffix, policy_idx = \
+        device, partition, suffix_parts, policy = \
            get_name_and_placement(request, 2, 3, True)
+        suffixes = suffix_parts.split('-') if suffix_parts else []
        try:
-            hashes = self._diskfile_mgr.get_hashes(device, partition, suffix,
-                                                   policy_idx)
+            hashes = self._diskfile_router[policy].get_hashes(
+                device, partition, suffixes, policy)
        except DiskFileDeviceUnavailable:
            resp = HTTPInsufficientStorage(drive=device, request=request)
        else:
@ -700,7 +880,7 @@ class ObjectController(BaseStorageServer):
    @public
    @replication
    @timing_stats(sample_rate=0.1)
-    def REPLICATION(self, request):
+    def SSYNC(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())

    def __call__(self, env, start_response):
@ -734,7 +914,7 @@ class ObjectController(BaseStorageServer):
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
-            if req.method in ('REPLICATE', 'REPLICATION') or \
+            if req.method in ('REPLICATE', 'SSYNC') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
--- a/swift/obj/ssync_receiver.py
+++ b/swift/obj/ssync_receiver.py
@ -24,27 +24,28 @@ from swift.common import exceptions
 from swift.common import http
 from swift.common import swob
 from swift.common import utils
+from swift.common import request_helpers


 class Receiver(object):
    """
-    Handles incoming REPLICATION requests to the object server.
+    Handles incoming SSYNC requests to the object server.

    These requests come from the object-replicator daemon that uses
    :py:mod:`.ssync_sender`.

-    The number of concurrent REPLICATION requests is restricted by
+    The number of concurrent SSYNC requests is restricted by
    use of a replication_semaphore and can be configured with the
    object-server.conf [object-server] replication_concurrency
    setting.

-    A REPLICATION request is really just an HTTP conduit for
+    An SSYNC request is really just an HTTP conduit for
    sender/receiver replication communication. The overall
-    REPLICATION request should always succeed, but it will contain
+    SSYNC request should always succeed, but it will contain
    multiple requests within its request and response bodies. This
    "hack" is done so that replication concurrency can be managed.

-    The general process inside a REPLICATION request is:
+    The general process inside an SSYNC request is:

        1. Initialize the request: Basic request validation, mount check,
           acquire semaphore lock, etc..
@ -72,10 +73,10 @@ class Receiver(object):

    def __call__(self):
        """
-        Processes a REPLICATION request.
+        Processes an SSYNC request.

        Acquires a semaphore lock and then proceeds through the steps
-        of the REPLICATION process.
+        of the SSYNC process.
        """
        # The general theme for functions __call__ calls is that they should
        # raise exceptions.MessageTimeout for client timeouts (logged locally),
@ -88,7 +89,7 @@ class Receiver(object):
        try:
            # Double try blocks in case our main error handlers fail.
            try:
-                # intialize_request is for preamble items that can be done
+                # initialize_request is for preamble items that can be done
                # outside a replication semaphore lock.
                for data in self.initialize_request():
                    yield data
@ -98,7 +99,7 @@ class Receiver(object):
                    if not self.app.replication_semaphore.acquire(False):
                        raise swob.HTTPServiceUnavailable()
                try:
-                    with self.app._diskfile_mgr.replication_lock(self.device):
+                    with self.diskfile_mgr.replication_lock(self.device):
                        for data in self.missing_check():
                            yield data
                        for data in self.updates():
@ -111,7 +112,7 @@ class Receiver(object):
                        self.app.replication_semaphore.release()
            except exceptions.ReplicationLockTimeout as err:
                self.app.logger.debug(
-                    '%s/%s/%s REPLICATION LOCK TIMEOUT: %s' % (
+                    '%s/%s/%s SSYNC LOCK TIMEOUT: %s' % (
                        self.request.remote_addr, self.device, self.partition,
                        err))
                yield ':ERROR: %d %r\n' % (0, str(err))
@ -166,14 +167,17 @@ class Receiver(object):
        """
        # The following is the setting we talk about above in _ensure_flush.
        self.request.environ['eventlet.minimum_write_chunk_size'] = 0
-        self.device, self.partition = utils.split_path(
-            urllib.unquote(self.request.path), 2, 2, False)
-        self.policy_idx = \
-            int(self.request.headers.get('X-Backend-Storage-Policy-Index', 0))
+        self.device, self.partition, self.policy = \
+            request_helpers.get_name_and_placement(self.request, 2, 2, False)
+        if 'X-Backend-Ssync-Frag-Index' in self.request.headers:
+            self.frag_index = int(
+                self.request.headers['X-Backend-Ssync-Frag-Index'])
+        else:
+            self.frag_index = None
        utils.validate_device_partition(self.device, self.partition)
-        if self.app._diskfile_mgr.mount_check and \
-                not constraints.check_mount(
-                    self.app._diskfile_mgr.devices, self.device):
+        self.diskfile_mgr = self.app._diskfile_router[self.policy]
+        if self.diskfile_mgr.mount_check and not constraints.check_mount(
+                self.diskfile_mgr.devices, self.device):
            raise swob.HTTPInsufficientStorage(drive=self.device)
        self.fp = self.request.environ['wsgi.input']
        for data in self._ensure_flush():
@ -182,7 +186,7 @@ class Receiver(object):
    def missing_check(self):
        """
        Handles the receiver-side of the MISSING_CHECK step of a
-        REPLICATION request.
+        SSYNC request.

        Receives a list of hashes and timestamps of object
        information the sender can provide and responds with a list
@ -226,11 +230,13 @@ class Receiver(object):
                line = self.fp.readline(self.app.network_chunk_size)
            if not line or line.strip() == ':MISSING_CHECK: END':
                break
-            object_hash, timestamp = [urllib.unquote(v) for v in line.split()]
+            parts = line.split()
+            object_hash, timestamp = [urllib.unquote(v) for v in parts[:2]]
            want = False
            try:
-                df = self.app._diskfile_mgr.get_diskfile_from_hash(
-                    self.device, self.partition, object_hash, self.policy_idx)
+                df = self.diskfile_mgr.get_diskfile_from_hash(
+                    self.device, self.partition, object_hash, self.policy,
+                    frag_index=self.frag_index)
            except exceptions.DiskFileNotExist:
                want = True
            else:
@ -253,7 +259,7 @@ class Receiver(object):

    def updates(self):
        """
-        Handles the UPDATES step of a REPLICATION request.
+        Handles the UPDATES step of an SSYNC request.

        Receives a set of PUT and DELETE subrequests that will be
        routed to the object server itself for processing. These
@ -353,7 +359,7 @@ class Receiver(object):
                    subreq_iter())
            else:
                raise Exception('Invalid subrequest method %s' % method)
-            subreq.headers['X-Backend-Storage-Policy-Index'] = self.policy_idx
+            subreq.headers['X-Backend-Storage-Policy-Index'] = int(self.policy)
            subreq.headers['X-Backend-Replication'] = 'True'
            if replication_headers:
                subreq.headers['X-Backend-Replication-Headers'] = \
--- a/swift/obj/ssync_sender.py
+++ b/swift/obj/ssync_sender.py
@ -22,7 +22,7 @@ from swift.common import http

 class Sender(object):
    """
-    Sends REPLICATION requests to the object server.
+    Sends SSYNC requests to the object server.

    These requests are eventually handled by
    :py:mod:`.ssync_receiver` and full documentation about the
@ -31,6 +31,7 @@ class Sender(object):

    def __init__(self, daemon, node, job, suffixes, remote_check_objs=None):
        self.daemon = daemon
+        self.df_mgr = self.daemon._diskfile_mgr
        self.node = node
        self.job = job
        self.suffixes = suffixes
@ -38,28 +39,28 @@ class Sender(object):
        self.response = None
        self.response_buffer = ''
        self.response_chunk_left = 0
-        self.available_set = set()
+        # available_map has an entry for each object in given suffixes that
+        # is available to be sync'd; each entry is a hash => timestamp
+        self.available_map = {}
        # When remote_check_objs is given in job, ssync_sender trys only to
        # make sure those objects exist or not in remote.
        self.remote_check_objs = remote_check_objs
+        # send_list has an entry for each object that the receiver wants to
+        # be sync'ed; each entry is an object hash
        self.send_list = []
        self.failures = 0

-    @property
-    def policy_idx(self):
-        return int(self.job.get('policy_idx', 0))
-
    def __call__(self):
        """
        Perform ssync with remote node.

-        :returns: a 2-tuple, in the form (success, can_delete_objs).
-
-        Success is a boolean, and can_delete_objs is an iterable of strings
-        representing the hashes which are in sync with the remote node.
+        :returns: a 2-tuple, in the form (success, can_delete_objs) where
+                  success is a boolean and can_delete_objs is the map of
+                  objects that are in sync with the receiver. Each entry in
+                  can_delete_objs maps a hash => timestamp
        """
        if not self.suffixes:
-            return True, set()
+            return True, {}
        try:
            # Double try blocks in case our main error handler fails.
            try:
@ -72,18 +73,20 @@ class Sender(object):
                self.missing_check()
                if self.remote_check_objs is None:
                    self.updates()
-                    can_delete_obj = self.available_set
+                    can_delete_obj = self.available_map
                else:
                    # when we are initialized with remote_check_objs we don't
                    # *send* any requested updates; instead we only collect
                    # what's already in sync and safe for deletion
-                    can_delete_obj = self.available_set.difference(
-                        self.send_list)
+                    in_sync_hashes = (set(self.available_map.keys()) -
+                                      set(self.send_list))
+                    can_delete_obj = dict((hash_, self.available_map[hash_])
+                                          for hash_ in in_sync_hashes)
                self.disconnect()
                if not self.failures:
                    return True, can_delete_obj
                else:
-                    return False, set()
+                    return False, {}
            except (exceptions.MessageTimeout,
                    exceptions.ReplicationException) as err:
                self.daemon.logger.error(
@ -109,11 +112,11 @@ class Sender(object):
            # would only get called if the above except Exception handler
            # failed (bad node or job data).
            self.daemon.logger.exception('EXCEPTION in replication.Sender')
-        return False, set()
+        return False, {}

    def connect(self):
        """
-        Establishes a connection and starts a REPLICATION request
+        Establishes a connection and starts an SSYNC request
        with the object server.
        """
        with exceptions.MessageTimeout(
@ -121,11 +124,13 @@ class Sender(object):
            self.connection = bufferedhttp.BufferedHTTPConnection(
                '%s:%s' % (self.node['replication_ip'],
                           self.node['replication_port']))
-            self.connection.putrequest('REPLICATION', '/%s/%s' % (
+            self.connection.putrequest('SSYNC', '/%s/%s' % (
                self.node['device'], self.job['partition']))
            self.connection.putheader('Transfer-Encoding', 'chunked')
            self.connection.putheader('X-Backend-Storage-Policy-Index',
-                                      self.policy_idx)
+                                      int(self.job['policy']))
+            self.connection.putheader('X-Backend-Ssync-Frag-Index',
+                                      self.node['index'])
            self.connection.endheaders()
        with exceptions.MessageTimeout(
                self.daemon.node_timeout, 'connect receive'):
@ -137,7 +142,7 @@ class Sender(object):

    def readline(self):
        """
-        Reads a line from the REPLICATION response body.
+        Reads a line from the SSYNC response body.

        httplib has no readline and will block on read(x) until x is
        read, so we have to do the work ourselves. A bit of this is
@ -183,7 +188,7 @@ class Sender(object):
    def missing_check(self):
        """
        Handles the sender-side of the MISSING_CHECK step of a
-        REPLICATION request.
+        SSYNC request.

        Full documentation of this can be found at
        :py:meth:`.Receiver.missing_check`.
@ -193,14 +198,15 @@ class Sender(object):
                self.daemon.node_timeout, 'missing_check start'):
            msg = ':MISSING_CHECK: START\r\n'
            self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
-        hash_gen = self.daemon._diskfile_mgr.yield_hashes(
+        hash_gen = self.df_mgr.yield_hashes(
            self.job['device'], self.job['partition'],
-            self.policy_idx, self.suffixes)
+            self.job['policy'], self.suffixes,
+            frag_index=self.job.get('frag_index'))
        if self.remote_check_objs is not None:
            hash_gen = ifilter(lambda (path, object_hash, timestamp):
                               object_hash in self.remote_check_objs, hash_gen)
        for path, object_hash, timestamp in hash_gen:
-            self.available_set.add(object_hash)
+            self.available_map[object_hash] = timestamp
            with exceptions.MessageTimeout(
                    self.daemon.node_timeout,
                    'missing_check send line'):
@ -234,12 +240,13 @@ class Sender(object):
            line = line.strip()
            if line == ':MISSING_CHECK: END':
                break
-            if line:
-                self.send_list.append(line)
+            parts = line.split()
+            if parts:
+                self.send_list.append(parts[0])

    def updates(self):
        """
-        Handles the sender-side of the UPDATES step of a REPLICATION
+        Handles the sender-side of the UPDATES step of an SSYNC
        request.

        Full documentation of this can be found at
@ -252,15 +259,19 @@ class Sender(object):
            self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
        for object_hash in self.send_list:
            try:
-                df = self.daemon._diskfile_mgr.get_diskfile_from_hash(
+                df = self.df_mgr.get_diskfile_from_hash(
                    self.job['device'], self.job['partition'], object_hash,
-                    self.policy_idx)
+                    self.job['policy'], frag_index=self.job.get('frag_index'))
            except exceptions.DiskFileNotExist:
                continue
            url_path = urllib.quote(
                '/%s/%s/%s' % (df.account, df.container, df.obj))
            try:
                df.open()
+                # EC reconstructor may have passed a callback to build
+                # an alternative diskfile...
+                df = self.job.get('sync_diskfile_builder', lambda *args: df)(
+                    self.job, self.node, df.get_metadata())
            except exceptions.DiskFileDeleted as err:
                self.send_delete(url_path, err.timestamp)
            except exceptions.DiskFileError:
@ -328,7 +339,7 @@ class Sender(object):
    def disconnect(self):
        """
        Closes down the connection to the object server once done
-        with the REPLICATION request.
+        with the SSYNC request.
        """
        try:
            with exceptions.MessageTimeout(
--- a/swift/obj/updater.py
+++ b/swift/obj/updater.py
@ -29,7 +29,8 @@ from swift.common.ring import Ring
 from swift.common.utils import get_logger, renamer, write_pickle, \
    dump_recon_cache, config_true_value, ismount
 from swift.common.daemon import Daemon
-from swift.obj.diskfile import get_tmp_dir, get_async_dir, ASYNCDIR_BASE
+from swift.common.storage_policy import split_policy_string, PolicyError
+from swift.obj.diskfile import get_tmp_dir, ASYNCDIR_BASE
 from swift.common.http import is_success, HTTP_NOT_FOUND, \
    HTTP_INTERNAL_SERVER_ERROR

@ -148,28 +149,19 @@ class ObjectUpdater(Daemon):
        start_time = time.time()
        # loop through async pending dirs for all policies
        for asyncdir in self._listdir(device):
-            # skip stuff like "accounts", "containers", etc.
-            if not (asyncdir == ASYNCDIR_BASE or
-                    asyncdir.startswith(ASYNCDIR_BASE + '-')):
-                continue
-
            # we only care about directories
            async_pending = os.path.join(device, asyncdir)
            if not os.path.isdir(async_pending):
                continue
-
-            if asyncdir == ASYNCDIR_BASE:
-                policy_idx = 0
-            else:
-                _junk, policy_idx = asyncdir.split('-', 1)
-                try:
-                    policy_idx = int(policy_idx)
-                    get_async_dir(policy_idx)
-                except ValueError:
-                    self.logger.warn(_('Directory %s does not map to a '
-                                       'valid policy') % asyncdir)
-                    continue
-
+            if not asyncdir.startswith(ASYNCDIR_BASE):
+                # skip stuff like "accounts", "containers", etc.
+                continue
+            try:
+                base, policy = split_policy_string(asyncdir)
+            except PolicyError as e:
+                self.logger.warn(_('Directory %r does not map '
+                                   'to a valid policy (%s)') % (asyncdir, e))
+                continue
            for prefix in self._listdir(async_pending):
                prefix_path = os.path.join(async_pending, prefix)
                if not os.path.isdir(prefix_path):
@ -193,7 +185,7 @@ class ObjectUpdater(Daemon):
                        os.unlink(update_path)
                    else:
                        self.process_object_update(update_path, device,
-                                                   policy_idx)
+                                                   policy)
                        last_obj_hash = obj_hash
                    time.sleep(self.slowdown)
                try:
@ -202,13 +194,13 @@ class ObjectUpdater(Daemon):
                    pass
            self.logger.timing_since('timing', start_time)

-    def process_object_update(self, update_path, device, policy_idx):
+    def process_object_update(self, update_path, device, policy):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
-        :param policy_idx: storage policy index of object update
+        :param policy: storage policy of object update
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
@ -228,7 +220,7 @@ class ObjectUpdater(Daemon):
        headers_out = update['headers'].copy()
        headers_out['user-agent'] = 'object-updater %s' % os.getpid()
        headers_out.setdefault('X-Backend-Storage-Policy-Index',
-                               str(policy_idx))
+                               str(int(policy)))
        events = [spawn(self.object_update,
                        node, part, update['op'], obj, headers_out)
                  for node in nodes if node['id'] not in successes]
@ -256,7 +248,7 @@ class ObjectUpdater(Daemon):
            if new_successes:
                update['successes'] = successes
                write_pickle(update, update_path, os.path.join(
-                    device, get_tmp_dir(policy_idx)))
+                    device, get_tmp_dir(policy)))

    def object_update(self, node, part, op, obj, headers_out):
        """
--- a/swift/proxy/controllers/init.py
+++ b/swift/proxy/controllers/init.py
@ -13,7 +13,7 @@

 from swift.proxy.controllers.base import Controller
 from swift.proxy.controllers.info import InfoController
-from swift.proxy.controllers.obj import ObjectController
+from swift.proxy.controllers.obj import ObjectControllerRouter
 from swift.proxy.controllers.account import AccountController
 from swift.proxy.controllers.container import ContainerController

@ -22,5 +22,5 @@ __all__ = [
    'ContainerController',
    'Controller',
    'InfoController',
-    'ObjectController',
+    'ObjectControllerRouter',
 ]
--- a/swift/proxy/controllers/account.py
+++ b/swift/proxy/controllers/account.py
@ -58,9 +58,10 @@ class AccountController(Controller):
                         constraints.MAX_ACCOUNT_NAME_LENGTH)
            return resp

-        partition, nodes = self.app.account_ring.get_nodes(self.account_name)
+        partition = self.app.account_ring.get_part(self.account_name)
+        node_iter = self.app.iter_nodes(self.app.account_ring, partition)
        resp = self.GETorHEAD_base(
-            req, _('Account'), self.app.account_ring, partition,
+            req, _('Account'), node_iter, partition,
            req.swift_entity_path.rstrip('/'))
        if resp.status_int == HTTP_NOT_FOUND:
            if resp.headers.get('X-Account-Status', '').lower() == 'deleted':
--- a/swift/proxy/controllers/base.py
+++ b/swift/proxy/controllers/base.py
@ -28,6 +28,7 @@ import os
 import time
 import functools
 import inspect
+import logging
 import operator
 from sys import exc_info
 from swift import gettext_ as _
@ -39,16 +40,16 @@ from eventlet.timeout import Timeout
 from swift.common.wsgi import make_pre_authed_env
 from swift.common.utils import Timestamp, config_true_value, \
    public, split_path, list_from_csv, GreenthreadSafeIterator, \
-    quorum_size, GreenAsyncPile
+    GreenAsyncPile, quorum_size, parse_content_range
 from swift.common.bufferedhttp import http_connect
 from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \
    ConnectionTimeout
 from swift.common.http import is_informational, is_success, is_redirection, \
    is_server_error, HTTP_OK, HTTP_PARTIAL_CONTENT, HTTP_MULTIPLE_CHOICES, \
    HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVICE_UNAVAILABLE, \
-    HTTP_INSUFFICIENT_STORAGE, HTTP_UNAUTHORIZED
+    HTTP_INSUFFICIENT_STORAGE, HTTP_UNAUTHORIZED, HTTP_CONTINUE
 from swift.common.swob import Request, Response, HeaderKeyDict, Range, \
-    HTTPException, HTTPRequestedRangeNotSatisfiable
+    HTTPException, HTTPRequestedRangeNotSatisfiable, HTTPServiceUnavailable
 from swift.common.request_helpers import strip_sys_meta_prefix, \
    strip_user_meta_prefix, is_user_meta, is_sys_meta, is_sys_or_user_meta
 from swift.common.storage_policy import POLICIES
@ -593,16 +594,37 @@ def close_swift_conn(src):
        pass


+def bytes_to_skip(record_size, range_start):
+    """
+    Assume an object is composed of N records, where the first N-1 are all
+    the same size and the last is at most that large, but may be smaller.
+
+    When a range request is made, it might start with a partial record. This
+    must be discarded, lest the consumer get bad data. This is particularly
+    true of suffix-byte-range requests, e.g. "Range: bytes=-12345" where the
+    size of the object is unknown at the time the request is made.
+
+    This function computes the number of bytes that must be discarded to
+    ensure only whole records are yielded. Erasure-code decoding needs this.
+
+    This function could have been inlined, but it took enough tries to get
+    right that some targeted unit tests were desirable, hence its extraction.
+    """
+    return (record_size - (range_start % record_size)) % record_size
+
+
 class GetOrHeadHandler(object):

-    def __init__(self, app, req, server_type, ring, partition, path,
-                 backend_headers):
+    def __init__(self, app, req, server_type, node_iter, partition, path,
+                 backend_headers, client_chunk_size=None):
        self.app = app
-        self.ring = ring
+        self.node_iter = node_iter
        self.server_type = server_type
        self.partition = partition
        self.path = path
        self.backend_headers = backend_headers
+        self.client_chunk_size = client_chunk_size
+        self.skip_bytes = 0
        self.used_nodes = []
        self.used_source_etag = ''

@ -649,6 +671,35 @@ class GetOrHeadHandler(object):
        else:
            self.backend_headers['Range'] = 'bytes=%d-' % num_bytes

+    def learn_size_from_content_range(self, start, end):
+        """
+        If client_chunk_size is set, makes sure we yield things starting on
+        chunk boundaries based on the Content-Range header in the response.
+
+        Sets our first Range header to the value learned from the
+        Content-Range header in the response; if we were given a
+        fully-specified range (e.g. "bytes=123-456"), this is a no-op.
+
+        If we were given a half-specified range (e.g. "bytes=123-" or
+        "bytes=-456"), then this changes the Range header to a
+        semantically-equivalent one *and* it lets us resume on a proper
+        boundary instead of just in the middle of a piece somewhere.
+
+        If the original request is for more than one range, this does not
+        affect our backend Range header, since we don't support resuming one
+        of those anyway.
+        """
+        if self.client_chunk_size:
+            self.skip_bytes = bytes_to_skip(self.client_chunk_size, start)
+
+        if 'Range' in self.backend_headers:
+            req_range = Range(self.backend_headers['Range'])
+
+            if len(req_range.ranges) > 1:
+                return
+
+            self.backend_headers['Range'] = "bytes=%d-%d" % (start, end)
+
    def is_good_source(self, src):
        """
        Indicates whether or not the request made to the backend found
@ -674,42 +725,74 @@ class GetOrHeadHandler(object):
        """
        try:
            nchunks = 0
-            bytes_read_from_source = 0
+            client_chunk_size = self.client_chunk_size
+            bytes_consumed_from_backend = 0
            node_timeout = self.app.node_timeout
            if self.server_type == 'Object':
                node_timeout = self.app.recoverable_node_timeout
+            buf = ''
            while True:
                try:
                    with ChunkReadTimeout(node_timeout):
                        chunk = source.read(self.app.object_chunk_size)
                        nchunks += 1
-                        bytes_read_from_source += len(chunk)
+                        buf += chunk
                except ChunkReadTimeout:
                    exc_type, exc_value, exc_traceback = exc_info()
                    if self.newest or self.server_type != 'Object':
                        raise exc_type, exc_value, exc_traceback
                    try:
-                        self.fast_forward(bytes_read_from_source)
+                        self.fast_forward(bytes_consumed_from_backend)
                    except (NotImplementedError, HTTPException, ValueError):
                        raise exc_type, exc_value, exc_traceback
+                    buf = ''
                    new_source, new_node = self._get_source_and_node()
                    if new_source:
                        self.app.exception_occurred(
                            node, _('Object'),
-                            _('Trying to read during GET (retrying)'))
+                            _('Trying to read during GET (retrying)'),
+                            level=logging.ERROR, exc_info=(
+                                exc_type, exc_value, exc_traceback))
                        # Close-out the connection as best as possible.
                        if getattr(source, 'swift_conn', None):
                            close_swift_conn(source)
                        source = new_source
                        node = new_node
-                        bytes_read_from_source = 0
                        continue
                    else:
                        raise exc_type, exc_value, exc_traceback
+
+                if buf and self.skip_bytes:
+                    if self.skip_bytes < len(buf):
+                        buf = buf[self.skip_bytes:]
+                        bytes_consumed_from_backend += self.skip_bytes
+                        self.skip_bytes = 0
+                    else:
+                        self.skip_bytes -= len(buf)
+                        bytes_consumed_from_backend += len(buf)
+                        buf = ''
+
                if not chunk:
+                    if buf:
+                        with ChunkWriteTimeout(self.app.client_timeout):
+                            bytes_consumed_from_backend += len(buf)
+                            yield buf
+                        buf = ''
                    break
-                with ChunkWriteTimeout(self.app.client_timeout):
-                    yield chunk
+
+                if client_chunk_size is not None:
+                    while len(buf) >= client_chunk_size:
+                        client_chunk = buf[:client_chunk_size]
+                        buf = buf[client_chunk_size:]
+                        with ChunkWriteTimeout(self.app.client_timeout):
+                            yield client_chunk
+                        bytes_consumed_from_backend += len(client_chunk)
+                else:
+                    with ChunkWriteTimeout(self.app.client_timeout):
+                        yield buf
+                    bytes_consumed_from_backend += len(buf)
+                    buf = ''
+
                # This is for fairness; if the network is outpacing the CPU,
                # we'll always be able to read and write data without
                # encountering an EWOULDBLOCK, and so eventlet will not switch
@ -757,7 +840,7 @@ class GetOrHeadHandler(object):
        node_timeout = self.app.node_timeout
        if self.server_type == 'Object' and not self.newest:
            node_timeout = self.app.recoverable_node_timeout
-        for node in self.app.iter_nodes(self.ring, self.partition):
+        for node in self.node_iter:
            if node in self.used_nodes:
                continue
            start_node_timing = time.time()
@ -793,8 +876,10 @@ class GetOrHeadHandler(object):
                        src_headers = dict(
                            (k.lower(), v) for k, v in
                            possible_source.getheaders())
-                        if src_headers.get('etag', '').strip('"') != \
-                                self.used_source_etag:
+
+                        if self.used_source_etag != src_headers.get(
+                                'x-object-sysmeta-ec-etag',
+                                src_headers.get('etag', '')).strip('"'):
                            self.statuses.append(HTTP_NOT_FOUND)
                            self.reasons.append('')
                            self.bodies.append('')
@ -832,7 +917,9 @@ class GetOrHeadHandler(object):
            src_headers = dict(
                (k.lower(), v) for k, v in
                possible_source.getheaders())
-            self.used_source_etag = src_headers.get('etag', '').strip('"')
+            self.used_source_etag = src_headers.get(
+                'x-object-sysmeta-ec-etag',
+                src_headers.get('etag', '')).strip('"')
            return source, node
        return None, None

@ -841,13 +928,17 @@ class GetOrHeadHandler(object):
        res = None
        if source:
            res = Response(request=req)
+            res.status = source.status
+            update_headers(res, source.getheaders())
            if req.method == 'GET' and \
                    source.status in (HTTP_OK, HTTP_PARTIAL_CONTENT):
+                cr = res.headers.get('Content-Range')
+                if cr:
+                    start, end, total = parse_content_range(cr)
+                    self.learn_size_from_content_range(start, end)
                res.app_iter = self._make_app_iter(req, node, source)
                # See NOTE: swift_conn at top of file about this.
                res.swift_conn = source.swift_conn
-            res.status = source.status
-            update_headers(res, source.getheaders())
            if not res.environ:
                res.environ = {}
            res.environ['swift_x_timestamp'] = \
@ -993,7 +1084,8 @@ class Controller(object):
        else:
            info['partition'] = part
            info['nodes'] = nodes
-            info.setdefault('storage_policy', '0')
+        if info.get('storage_policy') is None:
+            info['storage_policy'] = 0
        return info

    def _make_request(self, nodes, part, method, path, headers, query,
@ -1098,6 +1190,13 @@ class Controller(object):
                                  '%s %s' % (self.server_type, req.method),
                                  overrides=overrides, headers=resp_headers)

+    def _quorum_size(self, n):
+        """
+        Number of successful backend responses needed for the proxy to
+        consider the client request successful.
+        """
+        return quorum_size(n)
+
    def have_quorum(self, statuses, node_count):
        """
        Given a list of statuses from several requests, determine if
@ -1107,16 +1206,18 @@ class Controller(object):
        :param node_count: number of nodes being queried (basically ring count)
        :returns: True or False, depending on if quorum is established
        """
-        quorum = quorum_size(node_count)
+        quorum = self._quorum_size(node_count)
        if len(statuses) >= quorum:
-            for hundred in (HTTP_OK, HTTP_MULTIPLE_CHOICES, HTTP_BAD_REQUEST):
+            for hundred in (HTTP_CONTINUE, HTTP_OK, HTTP_MULTIPLE_CHOICES,
+                            HTTP_BAD_REQUEST):
                if sum(1 for s in statuses
                       if hundred <= s < hundred + 100) >= quorum:
                    return True
        return False

    def best_response(self, req, statuses, reasons, bodies, server_type,
-                      etag=None, headers=None, overrides=None):
+                      etag=None, headers=None, overrides=None,
+                      quorum_size=None):
        """
        Given a list of responses from several servers, choose the best to
        return to the API.
@ -1128,10 +1229,16 @@ class Controller(object):
        :param server_type: type of server the responses came from
        :param etag: etag
        :param headers: headers of each response
+        :param overrides: overrides to apply when lacking quorum
+        :param quorum_size: quorum size to use
        :returns: swob.Response object with the correct status, body, etc. set
        """
+        if quorum_size is None:
+            quorum_size = self._quorum_size(len(statuses))
+
        resp = self._compute_quorum_response(
-            req, statuses, reasons, bodies, etag, headers)
+            req, statuses, reasons, bodies, etag, headers,
+            quorum_size=quorum_size)
        if overrides and not resp:
            faked_up_status_indices = set()
            transformed = []
@ -1145,25 +1252,25 @@ class Controller(object):
            statuses, reasons, headers, bodies = zip(*transformed)
            resp = self._compute_quorum_response(
                req, statuses, reasons, bodies, etag, headers,
-                indices_to_avoid=faked_up_status_indices)
+                indices_to_avoid=faked_up_status_indices,
+                quorum_size=quorum_size)

        if not resp:
-            resp = Response(request=req)
+            resp = HTTPServiceUnavailable(request=req)
            self.app.logger.error(_('%(type)s returning 503 for %(statuses)s'),
                                  {'type': server_type, 'statuses': statuses})
-            resp.status = '503 Internal Server Error'

        return resp

    def _compute_quorum_response(self, req, statuses, reasons, bodies, etag,
-                                 headers, indices_to_avoid=()):
+                                 headers, quorum_size, indices_to_avoid=()):
        if not statuses:
            return None
        for hundred in (HTTP_OK, HTTP_MULTIPLE_CHOICES, HTTP_BAD_REQUEST):
            hstatuses = \
                [(i, s) for i, s in enumerate(statuses)
                 if hundred <= s < hundred + 100]
-            if len(hstatuses) >= quorum_size(len(statuses)):
+            if len(hstatuses) >= quorum_size:
                resp = Response(request=req)
                try:
                    status_index, status = max(
@ -1228,22 +1335,25 @@ class Controller(object):
        else:
            self.app.logger.warning('Could not autocreate account %r' % path)

-    def GETorHEAD_base(self, req, server_type, ring, partition, path):
+    def GETorHEAD_base(self, req, server_type, node_iter, partition, path,
+                       client_chunk_size=None):
        """
        Base handler for HTTP GET or HEAD requests.

        :param req: swob.Request object
        :param server_type: server type used in logging
-        :param ring: the ring to obtain nodes from
+        :param node_iter: an iterator to obtain nodes from
        :param partition: partition
        :param path: path for the request
+        :param client_chunk_size: chunk size for response body iterator
        :returns: swob.Response object
        """
        backend_headers = self.generate_request_headers(
            req, additional=req.headers)

-        handler = GetOrHeadHandler(self.app, req, self.server_type, ring,
-                                   partition, path, backend_headers)
+        handler = GetOrHeadHandler(self.app, req, self.server_type, node_iter,
+                                   partition, path, backend_headers,
+                                   client_chunk_size=client_chunk_size)
        res = handler.get_working_response(req)

        if not res:
--- a/swift/proxy/controllers/container.py
+++ b/swift/proxy/controllers/container.py
@ -93,8 +93,9 @@ class ContainerController(Controller):
            return HTTPNotFound(request=req)
        part = self.app.container_ring.get_part(
            self.account_name, self.container_name)
+        node_iter = self.app.iter_nodes(self.app.container_ring, part)
        resp = self.GETorHEAD_base(
-            req, _('Container'), self.app.container_ring, part,
+            req, _('Container'), node_iter, part,
            req.swift_entity_path)
        if 'swift.authorize' in req.environ:
            req.acl = resp.headers.get('x-container-read')
--- a/swift/proxy/controllers/obj.py
+++ b/swift/proxy/controllers/obj.py
--- a/swift/proxy/server.py
+++ b/swift/proxy/server.py
@ -20,6 +20,8 @@ from swift import gettext_ as _
 from random import shuffle
 from time import time
 import itertools
+import functools
+import sys

 from eventlet import Timeout

@ -31,12 +33,14 @@ from swift.common.utils import cache_from_env, get_logger, \
    get_remote_client, split_path, config_true_value, generate_trans_id, \
    affinity_key_function, affinity_locality_predicate, list_from_csv, \
    register_swift_info
-from swift.common.constraints import check_utf8
-from swift.proxy.controllers import AccountController, ObjectController, \
-    ContainerController, InfoController
+from swift.common.constraints import check_utf8, valid_api_version
+from swift.proxy.controllers import AccountController, ContainerController, \
+    ObjectControllerRouter, InfoController
+from swift.proxy.controllers.base import get_container_info
 from swift.common.swob import HTTPBadRequest, HTTPForbidden, \
    HTTPMethodNotAllowed, HTTPNotFound, HTTPPreconditionFailed, \
-    HTTPServerError, HTTPException, Request
+    HTTPServerError, HTTPException, Request, HTTPServiceUnavailable
+from swift.common.exceptions import APIVersionError


 # List of entry points for mandatory middlewares.
@ -109,6 +113,7 @@ class Application(object):
        # ensure rings are loaded for all configured storage policies
        for policy in POLICIES:
            policy.load_ring(swift_dir)
+        self.obj_controller_router = ObjectControllerRouter()
        self.memcache = memcache
        mimetypes.init(mimetypes.knownfiles +
                       [os.path.join(swift_dir, 'mime.types')])
@ -206,7 +211,7 @@ class Application(object):
        self.expose_info = config_true_value(
            conf.get('expose_info', 'yes'))
        self.disallowed_sections = list_from_csv(
-            conf.get('disallowed_sections'))
+            conf.get('disallowed_sections', 'swift.valid_api_versions'))
        self.admin_key = conf.get('admin_key', None)
        register_swift_info(
            version=swift_version,
@ -235,29 +240,46 @@ class Application(object):
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

-    def get_controller(self, path):
+    def get_controller(self, req):
        """
        Get the controller to handle a request.

-        :param path: path from request
+        :param req: the request
        :returns: tuple of (controller class, path dictionary)

        :raises: ValueError (thrown by split_path) if given invalid path
        """
-        if path == '/info':
+        if req.path == '/info':
            d = dict(version=None,
                     expose_info=self.expose_info,
                     disallowed_sections=self.disallowed_sections,
                     admin_key=self.admin_key)
            return InfoController, d

-        version, account, container, obj = split_path(path, 1, 4, True)
+        version, account, container, obj = split_path(req.path, 1, 4, True)
        d = dict(version=version,
                 account_name=account,
                 container_name=container,
                 object_name=obj)
+        if account and not valid_api_version(version):
+            raise APIVersionError('Invalid path')
        if obj and container and account:
-            return ObjectController, d
+            info = get_container_info(req.environ, self)
+            policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
+                                           info['storage_policy'])
+            policy = POLICIES.get_by_index(policy_index)
+            if not policy:
+                # This indicates that a new policy has been created,
+                # with rings, deployed, released (i.e. deprecated =
+                # False), used by a client to create a container via
+                # another proxy that was restarted after the policy
+                # was released, and is now cached - all before this
+                # worker was HUPed to stop accepting new
+                # connections.  There should never be an "unknown"
+                # index - but when there is - it's probably operator
+                # error and hopefully temporary.
+                raise HTTPServiceUnavailable('Unknown Storage Policy')
+            return self.obj_controller_router[policy], d
        elif container and account:
            return ContainerController, d
        elif account and not container and not obj:
@ -317,10 +339,13 @@ class Application(object):
                    request=req, body='Invalid UTF8 or contains NULL')

            try:
-                controller, path_parts = self.get_controller(req.path)
+                controller, path_parts = self.get_controller(req)
                p = req.path_info
                if isinstance(p, unicode):
                    p = p.encode('utf-8')
+            except APIVersionError:
+                self.logger.increment('errors')
+                return HTTPBadRequest(request=req)
            except ValueError:
                self.logger.increment('errors')
                return HTTPNotFound(request=req)
@ -474,9 +499,9 @@ class Application(object):
    def iter_nodes(self, ring, partition, node_iter=None):
        """
        Yields nodes for a ring partition, skipping over error
-        limited nodes and stopping at the configurable number of
-        nodes. If a node yielded subsequently gets error limited, an
-        extra node will be yielded to take its place.
+        limited nodes and stopping at the configurable number of nodes. If a
+        node yielded subsequently gets error limited, an extra node will be
+        yielded to take its place.

        Note that if you're going to iterate over this concurrently from
        multiple greenthreads, you'll want to use a
@ -527,7 +552,8 @@ class Application(object):
                    if nodes_left <= 0:
                        return

-    def exception_occurred(self, node, typ, additional_info):
+    def exception_occurred(self, node, typ, additional_info,
+                           **kwargs):
        """
        Handle logging of generic exceptions.

@ -536,11 +562,18 @@ class Application(object):
        :param additional_info: additional information to log
        """
        self._incr_node_errors(node)
-        self.logger.exception(
-            _('ERROR with %(type)s server %(ip)s:%(port)s/%(device)s re: '
-              '%(info)s'),
-            {'type': typ, 'ip': node['ip'], 'port': node['port'],
-             'device': node['device'], 'info': additional_info})
+        if 'level' in kwargs:
+            log = functools.partial(self.logger.log, kwargs.pop('level'))
+            if 'exc_info' not in kwargs:
+                kwargs['exc_info'] = sys.exc_info()
+        else:
+            log = self.logger.exception
+        log(_('ERROR with %(type)s server %(ip)s:%(port)s/%(device)s'
+              ' re: %(info)s'), {
+                  'type': typ, 'ip': node['ip'], 'port':
+                  node['port'], 'device': node['device'],
+                  'info': additional_info
+              }, **kwargs)

    def modify_wsgi_pipeline(self, pipe):
        """
--- a/test/functional/init.py
+++ b/test/functional/init.py
@ -23,6 +23,7 @@ import eventlet
 import eventlet.debug
 import functools
 import random
+from ConfigParser import ConfigParser, NoSectionError
 from time import time, sleep
 from httplib import HTTPException
 from urlparse import urlparse
@ -32,6 +33,7 @@ from gzip import GzipFile
 from shutil import rmtree
 from tempfile import mkdtemp
 from swift.common.middleware.memcache import MemcacheMiddleware
+from swift.common.storage_policy import parse_storage_policies, PolicyError

 from test import get_config
 from test.functional.swift_test_client import Account, Connection, \
@ -50,6 +52,9 @@ from swift.container import server as container_server
 from swift.obj import server as object_server, mem_server as mem_object_server
 import swift.proxy.controllers.obj

+
+DEBUG = True
+
 # In order to get the proper blocking behavior of sockets without using
 # threads, where we can set an arbitrary timeout for some piece of code under
 # test, we use eventlet with the standard socket library patched. We have to
@ -99,7 +104,7 @@ orig_hash_path_suff_pref = ('', '')
 orig_swift_conf_name = None

 in_process = False
-_testdir = _test_servers = _test_sockets = _test_coros = None
+_testdir = _test_servers = _test_coros = None


 class FakeMemcacheMiddleware(MemcacheMiddleware):
@ -113,29 +118,187 @@ class FakeMemcacheMiddleware(MemcacheMiddleware):
        self.memcache = FakeMemcache()


-# swift.conf contents for in-process functional test runs
-functests_swift_conf = '''
-[swift-hash]
-swift_hash_path_suffix = inprocfunctests
-swift_hash_path_prefix = inprocfunctests
+class InProcessException(BaseException):
+    pass

-[swift-constraints]
-max_file_size = %d
-''' % ((8 * 1024 * 1024) + 2)  # 8 MB + 2
+
+def _info(msg):
+    print >> sys.stderr, msg
+
+
+def _debug(msg):
+    if DEBUG:
+        _info('DEBUG: ' + msg)
+
+
+def _in_process_setup_swift_conf(swift_conf_src, testdir):
+    # override swift.conf contents for in-process functional test runs
+    conf = ConfigParser()
+    conf.read(swift_conf_src)
+    try:
+        section = 'swift-hash'
+        conf.set(section, 'swift_hash_path_suffix', 'inprocfunctests')
+        conf.set(section, 'swift_hash_path_prefix', 'inprocfunctests')
+        section = 'swift-constraints'
+        max_file_size = (8 * 1024 * 1024) + 2  # 8 MB + 2
+        conf.set(section, 'max_file_size', max_file_size)
+    except NoSectionError:
+        msg = 'Conf file %s is missing section %s' % (swift_conf_src, section)
+        raise InProcessException(msg)
+
+    test_conf_file = os.path.join(testdir, 'swift.conf')
+    with open(test_conf_file, 'w') as fp:
+        conf.write(fp)
+
+    return test_conf_file
+
+
+def _in_process_find_conf_file(conf_src_dir, conf_file_name, use_sample=True):
+    """
+    Look for a file first in conf_src_dir, if it exists, otherwise optionally
+    look in the source tree sample 'etc' dir.
+
+    :param conf_src_dir: Directory in which to search first for conf file. May
+                         be None
+    :param conf_file_name: Name of conf file
+    :param use_sample: If True and the conf_file_name is not found, then return
+                       any sample conf file found in the source tree sample
+                       'etc' dir by appending '-sample' to conf_file_name
+    :returns: Path to conf file
+    :raises InProcessException: If no conf file is found
+    """
+    dflt_src_dir = os.path.normpath(os.path.join(os.path.abspath(__file__),
+                                    os.pardir, os.pardir, os.pardir,
+                                    'etc'))
+    conf_src_dir = dflt_src_dir if conf_src_dir is None else conf_src_dir
+    conf_file_path = os.path.join(conf_src_dir, conf_file_name)
+    if os.path.exists(conf_file_path):
+        return conf_file_path
+
+    if use_sample:
+        # fall back to using the corresponding sample conf file
+        conf_file_name += '-sample'
+        conf_file_path = os.path.join(dflt_src_dir, conf_file_name)
+        if os.path.exists(conf_file_path):
+            return conf_file_path
+
+    msg = 'Failed to find config file %s' % conf_file_name
+    raise InProcessException(msg)
+
+
+def _in_process_setup_ring(swift_conf, conf_src_dir, testdir):
+    """
+    If SWIFT_TEST_POLICY is set:
+    - look in swift.conf file for specified policy
+    - move this to be policy-0 but preserving its options
+    - copy its ring file to test dir, changing its devices to suit
+      in process testing, and renaming it to suit policy-0
+    Otherwise, create a default ring file.
+    """
+    conf = ConfigParser()
+    conf.read(swift_conf)
+    sp_prefix = 'storage-policy:'
+
+    try:
+        # policy index 0 will be created if no policy exists in conf
+        policies = parse_storage_policies(conf)
+    except PolicyError as e:
+        raise InProcessException(e)
+
+    # clear all policies from test swift.conf before adding test policy back
+    for policy in policies:
+        conf.remove_section(sp_prefix + str(policy.idx))
+
+    policy_specified = os.environ.get('SWIFT_TEST_POLICY')
+    if policy_specified:
+        policy_to_test = policies.get_by_name(policy_specified)
+        if policy_to_test is None:
+            raise InProcessException('Failed to find policy name "%s"'
+                                     % policy_specified)
+        _info('Using specified policy %s' % policy_to_test.name)
+    else:
+        policy_to_test = policies.default
+        _info('Defaulting to policy %s' % policy_to_test.name)
+
+    # make policy_to_test be policy index 0 and default for the test config
+    sp_zero_section = sp_prefix + '0'
+    conf.add_section(sp_zero_section)
+    for (k, v) in policy_to_test.get_info(config=True).items():
+        conf.set(sp_zero_section, k, v)
+    conf.set(sp_zero_section, 'default', True)
+
+    with open(swift_conf, 'w') as fp:
+        conf.write(fp)
+
+    # look for a source ring file
+    ring_file_src = ring_file_test = 'object.ring.gz'
+    if policy_to_test.idx:
+        ring_file_src = 'object-%s.ring.gz' % policy_to_test.idx
+    try:
+        ring_file_src = _in_process_find_conf_file(conf_src_dir, ring_file_src,
+                                                   use_sample=False)
+    except InProcessException as e:
+        if policy_specified:
+            raise InProcessException('Failed to find ring file %s'
+                                     % ring_file_src)
+        ring_file_src = None
+
+    ring_file_test = os.path.join(testdir, ring_file_test)
+    if ring_file_src:
+        # copy source ring file to a policy-0 test ring file, re-homing servers
+        _info('Using source ring file %s' % ring_file_src)
+        ring_data = ring.RingData.load(ring_file_src)
+        obj_sockets = []
+        for dev in ring_data.devs:
+            device = 'sd%c1' % chr(len(obj_sockets) + ord('a'))
+            utils.mkdirs(os.path.join(_testdir, 'sda1'))
+            utils.mkdirs(os.path.join(_testdir, 'sda1', 'tmp'))
+            obj_socket = eventlet.listen(('localhost', 0))
+            obj_sockets.append(obj_socket)
+            dev['port'] = obj_socket.getsockname()[1]
+            dev['ip'] = '127.0.0.1'
+            dev['device'] = device
+            dev['replication_port'] = dev['port']
+            dev['replication_ip'] = dev['ip']
+        ring_data.save(ring_file_test)
+    else:
+        # make default test ring, 2 replicas, 4 partitions, 2 devices
+        _info('No source object ring file, creating 2rep/4part/2dev ring')
+        obj_sockets = [eventlet.listen(('localhost', 0)) for _ in (0, 1)]
+        ring_data = ring.RingData(
+            [[0, 1, 0, 1], [1, 0, 1, 0]],
+            [{'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1',
+              'port': obj_sockets[0].getsockname()[1]},
+             {'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1',
+              'port': obj_sockets[1].getsockname()[1]}],
+            30)
+        with closing(GzipFile(ring_file_test, 'wb')) as f:
+            pickle.dump(ring_data, f)
+
+    for dev in ring_data.devs:
+        _debug('Ring file dev: %s' % dev)
+
+    return obj_sockets


 def in_process_setup(the_object_server=object_server):
-    print >>sys.stderr, 'IN-PROCESS SERVERS IN USE FOR FUNCTIONAL TESTS'
-    print >>sys.stderr, 'Using object_server: %s' % the_object_server.__name__
-    _dir = os.path.normpath(os.path.join(os.path.abspath(__file__),
-                            os.pardir, os.pardir, os.pardir))
-    proxy_conf = os.path.join(_dir, 'etc', 'proxy-server.conf-sample')
-    if os.path.exists(proxy_conf):
-        print >>sys.stderr, 'Using proxy-server config from %s' % proxy_conf
+    _info('IN-PROCESS SERVERS IN USE FOR FUNCTIONAL TESTS')
+    _info('Using object_server class: %s' % the_object_server.__name__)
+    conf_src_dir = os.environ.get('SWIFT_TEST_IN_PROCESS_CONF_DIR')

-    else:
-        print >>sys.stderr, 'Failed to find conf file %s' % proxy_conf
-        return
+    if conf_src_dir is not None:
+        if not os.path.isdir(conf_src_dir):
+            msg = 'Config source %s is not a dir' % conf_src_dir
+            raise InProcessException(msg)
+        _info('Using config source dir: %s' % conf_src_dir)
+
+    # If SWIFT_TEST_IN_PROCESS_CONF specifies a config source dir then
+    # prefer config files from there, otherwise read config from source tree
+    # sample files. A mixture of files from the two sources is allowed.
+    proxy_conf = _in_process_find_conf_file(conf_src_dir, 'proxy-server.conf')
+    _info('Using proxy config from %s' % proxy_conf)
+    swift_conf_src = _in_process_find_conf_file(conf_src_dir, 'swift.conf')
+    _info('Using swift config from %s' % swift_conf_src)

    monkey_patch_mimetools()

@ -148,9 +311,8 @@ def in_process_setup(the_object_server=object_server):
    utils.mkdirs(os.path.join(_testdir, 'sdb1'))
    utils.mkdirs(os.path.join(_testdir, 'sdb1', 'tmp'))

-    swift_conf = os.path.join(_testdir, "swift.conf")
-    with open(swift_conf, "w") as scfp:
-        scfp.write(functests_swift_conf)
+    swift_conf = _in_process_setup_swift_conf(swift_conf_src, _testdir)
+    obj_sockets = _in_process_setup_ring(swift_conf, conf_src_dir, _testdir)

    global orig_swift_conf_name
    orig_swift_conf_name = utils.SWIFT_CONF_FILE
@ -221,11 +383,6 @@ def in_process_setup(the_object_server=object_server):
    acc2lis = eventlet.listen(('localhost', 0))
    con1lis = eventlet.listen(('localhost', 0))
    con2lis = eventlet.listen(('localhost', 0))
-    obj1lis = eventlet.listen(('localhost', 0))
-    obj2lis = eventlet.listen(('localhost', 0))
-    global _test_sockets
-    _test_sockets = \
-        (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, obj2lis)

    account_ring_path = os.path.join(_testdir, 'account.ring.gz')
    with closing(GzipFile(account_ring_path, 'wb')) as f:
@ -243,14 +400,6 @@ def in_process_setup(the_object_server=object_server):
                     {'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1',
                      'port': con2lis.getsockname()[1]}], 30),
                    f)
-    object_ring_path = os.path.join(_testdir, 'object.ring.gz')
-    with closing(GzipFile(object_ring_path, 'wb')) as f:
-        pickle.dump(ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]],
-                    [{'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1',
-                      'port': obj1lis.getsockname()[1]},
-                     {'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1',
-                      'port': obj2lis.getsockname()[1]}], 30),
-                    f)

    eventlet.wsgi.HttpProtocol.default_request_version = "HTTP/1.0"
    # Turn off logging requests by the underlying WSGI software.
@ -270,10 +419,13 @@ def in_process_setup(the_object_server=object_server):
        config, logger=debug_logger('cont1'))
    con2srv = container_server.ContainerController(
        config, logger=debug_logger('cont2'))
-    obj1srv = the_object_server.ObjectController(
-        config, logger=debug_logger('obj1'))
-    obj2srv = the_object_server.ObjectController(
-        config, logger=debug_logger('obj2'))
+
+    objsrvs = [
+        (obj_sockets[index],
+         the_object_server.ObjectController(
+             config, logger=debug_logger('obj%d' % (index + 1))))
+        for index in range(len(obj_sockets))
+    ]

    logger = debug_logger('proxy')

@ -283,7 +435,10 @@ def in_process_setup(the_object_server=object_server):
    with mock.patch('swift.common.utils.get_logger', get_logger):
        with mock.patch('swift.common.middleware.memcache.MemcacheMiddleware',
                        FakeMemcacheMiddleware):
-            app = loadapp(proxy_conf, global_conf=config)
+            try:
+                app = loadapp(proxy_conf, global_conf=config)
+            except Exception as e:
+                raise InProcessException(e)

    nl = utils.NullLogger()
    prospa = eventlet.spawn(eventlet.wsgi.server, prolis, app, nl)
@ -291,11 +446,13 @@ def in_process_setup(the_object_server=object_server):
    acc2spa = eventlet.spawn(eventlet.wsgi.server, acc2lis, acc2srv, nl)
    con1spa = eventlet.spawn(eventlet.wsgi.server, con1lis, con1srv, nl)
    con2spa = eventlet.spawn(eventlet.wsgi.server, con2lis, con2srv, nl)
-    obj1spa = eventlet.spawn(eventlet.wsgi.server, obj1lis, obj1srv, nl)
-    obj2spa = eventlet.spawn(eventlet.wsgi.server, obj2lis, obj2srv, nl)
+
+    objspa = [eventlet.spawn(eventlet.wsgi.server, objsrv[0], objsrv[1], nl)
+              for objsrv in objsrvs]
+
    global _test_coros
    _test_coros = \
-        (prospa, acc1spa, acc2spa, con1spa, con2spa, obj1spa, obj2spa)
+        (prospa, acc1spa, acc2spa, con1spa, con2spa) + tuple(objspa)

    # Create accounts "test" and "test2"
    def create_account(act):
@ -396,8 +553,13 @@ def setup_package():
    if in_process:
        in_mem_obj_env = os.environ.get('SWIFT_TEST_IN_MEMORY_OBJ')
        in_mem_obj = utils.config_true_value(in_mem_obj_env)
-        in_process_setup(the_object_server=(
-            mem_object_server if in_mem_obj else object_server))
+        try:
+            in_process_setup(the_object_server=(
+                mem_object_server if in_mem_obj else object_server))
+        except InProcessException as exc:
+            print >> sys.stderr, ('Exception during in-process setup: %s'
+                                  % str(exc))
+            raise

    global web_front_end
    web_front_end = config.get('web_front_end', 'integral')
--- a/test/functional/tests.py
+++ b/test/functional/tests.py
@ -1317,7 +1317,12 @@ class TestFile(Base):
        self.assertEqual(file_types, file_types_read)

    def testRangedGets(self):
-        file_length = 10000
+        # We set the file_length to a strange multiple here. This is to check
+        # that ranges still work in the EC case when the requested range
+        # spans EC segment boundaries. The 1 MiB base value is chosen because
+        # that's a common EC segment size. The 1.33 multiple is to ensure we
+        # aren't aligned on segment boundaries
+        file_length = int(1048576 * 1.33)
        range_size = file_length / 10
        file_item = self.env.container.file(Utils.create_name())
        data = file_item.write_random(file_length)
@ -2409,6 +2414,14 @@ class TestObjectVersioningEnv(object):
        cls.account = Account(cls.conn, tf.config.get('account',
                                                      tf.config['username']))

+        # Second connection for ACL tests
+        config2 = deepcopy(tf.config)
+        config2['account'] = tf.config['account2']
+        config2['username'] = tf.config['username2']
+        config2['password'] = tf.config['password2']
+        cls.conn2 = Connection(config2)
+        cls.conn2.authenticate()
+
        # avoid getting a prefix that stops halfway through an encoded
        # character
        prefix = Utils.create_name().decode("utf-8")[:10].encode("utf-8")
@ -2462,6 +2475,14 @@ class TestCrossPolicyObjectVersioningEnv(object):
        cls.account = Account(cls.conn, tf.config.get('account',
                                                      tf.config['username']))

+        # Second connection for ACL tests
+        config2 = deepcopy(tf.config)
+        config2['account'] = tf.config['account2']
+        config2['username'] = tf.config['username2']
+        config2['password'] = tf.config['password2']
+        cls.conn2 = Connection(config2)
+        cls.conn2.authenticate()
+
        # avoid getting a prefix that stops halfway through an encoded
        # character
        prefix = Utils.create_name().decode("utf-8")[:10].encode("utf-8")
@ -2496,6 +2517,15 @@ class TestObjectVersioning(Base):
                "Expected versioning_enabled to be True/False, got %r" %
                (self.env.versioning_enabled,))

+    def tearDown(self):
+        super(TestObjectVersioning, self).tearDown()
+        try:
+            # delete versions first!
+            self.env.versions_container.delete_files()
+            self.env.container.delete_files()
+        except ResponseError:
+            pass
+
    def test_overwriting(self):
        container = self.env.container
        versions_container = self.env.versions_container
@ -2555,6 +2585,33 @@ class TestObjectVersioning(Base):
        self.assertEqual(3, versions_container.info()['object_count'])
        self.assertEqual("112233", man_file.read())

+    def test_versioning_check_acl(self):
+        container = self.env.container
+        versions_container = self.env.versions_container
+        versions_container.create(hdrs={'X-Container-Read': '.r:*,.rlistings'})
+
+        obj_name = Utils.create_name()
+        versioned_obj = container.file(obj_name)
+        versioned_obj.write("aaaaa")
+        self.assertEqual("aaaaa", versioned_obj.read())
+
+        versioned_obj.write("bbbbb")
+        self.assertEqual("bbbbb", versioned_obj.read())
+
+        # Use token from second account and try to delete the object
+        org_token = self.env.account.conn.storage_token
+        self.env.account.conn.storage_token = self.env.conn2.storage_token
+        try:
+            self.assertRaises(ResponseError, versioned_obj.delete)
+        finally:
+            self.env.account.conn.storage_token = org_token
+
+        # Verify with token from first account
+        self.assertEqual("bbbbb", versioned_obj.read())
+
+        versioned_obj.delete()
+        self.assertEqual("aaaaa", versioned_obj.read())
+

 class TestObjectVersioningUTF8(Base2, TestObjectVersioning):
    set_up = False
@ -2768,10 +2825,23 @@ class TestContainerTempurlEnv(object):
            cls.conn, tf.config.get('account', tf.config['username']))
        cls.account.delete_containers()

+        # creating another account and connection
+        # for ACL tests
+        config2 = deepcopy(tf.config)
+        config2['account'] = tf.config['account2']
+        config2['username'] = tf.config['username2']
+        config2['password'] = tf.config['password2']
+        cls.conn2 = Connection(config2)
+        cls.conn2.authenticate()
+        cls.account2 = Account(
+            cls.conn2, config2.get('account', config2['username']))
+        cls.account2 = cls.conn2.get_account()
+
        cls.container = cls.account.container(Utils.create_name())
        if not cls.container.create({
                'x-container-meta-temp-url-key': cls.tempurl_key,
-                'x-container-meta-temp-url-key-2': cls.tempurl_key2}):
+                'x-container-meta-temp-url-key-2': cls.tempurl_key2,
+                'x-container-read': cls.account2.name}):
            raise ResponseError(cls.conn.response)

        cls.obj = cls.container.file(Utils.create_name())
@ -2914,6 +2984,28 @@ class TestContainerTempurl(Base):
                          parms=parms)
        self.assert_status([401])

+    def test_tempurl_keys_visible_to_account_owner(self):
+        if not tf.cluster_info.get('tempauth'):
+            raise SkipTest('TEMP AUTH SPECIFIC TEST')
+        metadata = self.env.container.info()
+        self.assertEqual(metadata.get('tempurl_key'), self.env.tempurl_key)
+        self.assertEqual(metadata.get('tempurl_key2'), self.env.tempurl_key2)
+
+    def test_tempurl_keys_hidden_from_acl_readonly(self):
+        if not tf.cluster_info.get('tempauth'):
+            raise SkipTest('TEMP AUTH SPECIFIC TEST')
+        original_token = self.env.container.conn.storage_token
+        self.env.container.conn.storage_token = self.env.conn2.storage_token
+        metadata = self.env.container.info()
+        self.env.container.conn.storage_token = original_token
+
+        self.assertTrue('tempurl_key' not in metadata,
+                        'Container TempURL key found, should not be visible '
+                        'to readonly ACLs')
+        self.assertTrue('tempurl_key2' not in metadata,
+                        'Container TempURL key-2 found, should not be visible '
+                        'to readonly ACLs')
+

 class TestContainerTempurlUTF8(Base2, TestContainerTempurl):
    set_up = False
--- a/test/probe/brain.py
+++ b/test/probe/brain.py
@ -67,7 +67,7 @@ class BrainSplitter(object):
    __metaclass__ = meta_command

    def __init__(self, url, token, container_name='test', object_name='test',
-                 server_type='container'):
+                 server_type='container', policy=None):
        self.url = url
        self.token = token
        self.account = utils.split_path(urlparse(url).path, 2, 2)[1]
@ -81,9 +81,26 @@ class BrainSplitter(object):

        o = object_name if server_type == 'object' else None
        c = container_name if server_type in ('object', 'container') else None
-        part, nodes = ring.Ring(
-            '/etc/swift/%s.ring.gz' % server_type).get_nodes(
-                self.account, c, o)
+        if server_type in ('container', 'account'):
+            if policy:
+                raise TypeError('Metadata server brains do not '
+                                'support specific storage policies')
+            self.policy = None
+            self.ring = ring.Ring(
+                '/etc/swift/%s.ring.gz' % server_type)
+        elif server_type == 'object':
+            if not policy:
+                raise TypeError('Object BrainSplitters need to '
+                                'specify the storage policy')
+            self.policy = policy
+            policy.load_ring('/etc/swift')
+            self.ring = policy.object_ring
+        else:
+            raise ValueError('Unkonwn server_type: %r' % server_type)
+        self.server_type = server_type
+
+        part, nodes = self.ring.get_nodes(self.account, c, o)
+
        node_ids = [n['id'] for n in nodes]
        if all(n_id in node_ids for n_id in (0, 1)):
            self.primary_numbers = (1, 2)
@ -172,6 +189,8 @@ parser.add_option('-o', '--object', default='object-%s' % uuid.uuid4(),
                  help='set object name')
 parser.add_option('-s', '--server_type', default='container',
                  help='set server type')
+parser.add_option('-P', '--policy_name', default=None,
+                  help='set policy')


 def main():
@ -186,8 +205,17 @@ def main():
            return 'ERROR: unknown command %s' % cmd
    url, token = get_auth('http://127.0.0.1:8080/auth/v1.0',
                          'test:tester', 'testing')
+    if options.server_type == 'object' and not options.policy_name:
+        options.policy_name = POLICIES.default.name
+    if options.policy_name:
+        options.server_type = 'object'
+        policy = POLICIES.get_by_name(options.policy_name)
+        if not policy:
+            return 'ERROR: unknown policy %r' % options.policy_name
+    else:
+        policy = None
    brain = BrainSplitter(url, token, options.container, options.object,
-                          options.server_type)
+                          options.server_type, policy=policy)
    for cmd_args in commands:
        parts = cmd_args.split(':', 1)
        command = parts[0]
--- a/test/probe/common.py
+++ b/test/probe/common.py
@ -24,15 +24,19 @@ from nose import SkipTest

 from swiftclient import get_auth, head_account

+from swift.obj.diskfile import get_data_dir
 from swift.common.ring import Ring
 from swift.common.utils import readconf
 from swift.common.manager import Manager
-from swift.common.storage_policy import POLICIES
+from swift.common.storage_policy import POLICIES, EC_POLICY, REPL_POLICY

 from test.probe import CHECK_SERVER_TIMEOUT, VALIDATE_RSYNC


 ENABLED_POLICIES = [p for p in POLICIES if not p.is_deprecated]
+POLICIES_BY_TYPE = defaultdict(list)
+for p in POLICIES:
+    POLICIES_BY_TYPE[p.policy_type].append(p)


 def get_server_number(port, port2server):
@ -138,6 +142,17 @@ def kill_nonprimary_server(primary_nodes, port2server, pids):
            return port


+def build_port_to_conf(server):
+    # map server to config by port
+    port_to_config = {}
+    for server_ in Manager([server]):
+        for config_path in server_.conf_files():
+            conf = readconf(config_path,
+                            section_name='%s-replicator' % server_.type)
+            port_to_config[int(conf['bind_port'])] = conf
+    return port_to_config
+
+
 def get_ring(ring_name, required_replicas, required_devices,
             server=None, force_validate=None):
    if not server:
@ -152,13 +167,7 @@ def get_ring(ring_name, required_replicas, required_devices,
    if len(ring.devs) != required_devices:
        raise SkipTest('%s has %s devices instead of %s' % (
            ring.serialized_path, len(ring.devs), required_devices))
-    # map server to config by port
-    port_to_config = {}
-    for server_ in Manager([server]):
-        for config_path in server_.conf_files():
-            conf = readconf(config_path,
-                            section_name='%s-replicator' % server_.type)
-            port_to_config[int(conf['bind_port'])] = conf
+    port_to_config = build_port_to_conf(server)
    for dev in ring.devs:
        # verify server is exposing mounted device
        conf = port_to_config[dev['port']]
@ -198,7 +207,8 @@ def get_ring(ring_name, required_replicas, required_devices,

 def get_policy(**kwargs):
    kwargs.setdefault('is_deprecated', False)
-    # go thru the policies and make sure they match the requirements of kwargs
+    # go through the policies and make sure they match the
+    # requirements of kwargs
    for policy in POLICIES:
        # TODO: for EC, pop policy type here and check it first
        matches = True
@ -261,6 +271,10 @@ class ProbeTest(unittest.TestCase):
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
+            self.server_port_to_conf = {}
+            # get some configs backend daemon configs loaded up
+            for server in ('account', 'container', 'object'):
+                self.server_port_to_conf[server] = build_port_to_conf(server)
        except BaseException:
            try:
                raise
@ -273,6 +287,23 @@ class ProbeTest(unittest.TestCase):
    def tearDown(self):
        Manager(['all']).kill()

+    def device_dir(self, server, node):
+        conf = self.server_port_to_conf[server][node['port']]
+        return os.path.join(conf['devices'], node['device'])
+
+    def storage_dir(self, server, node, part=None, policy=None):
+        policy = policy or self.policy
+        device_path = self.device_dir(server, node)
+        path_parts = [device_path, get_data_dir(policy)]
+        if part is not None:
+            path_parts.append(str(part))
+        return os.path.join(*path_parts)
+
+    def config_number(self, node):
+        _server_type, config_number = get_server_number(
+            node['port'], self.port2server)
+        return config_number
+
    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
@ -290,7 +321,16 @@ class ReplProbeTest(ProbeTest):
    acct_cont_required_devices = 4
    obj_required_replicas = 3
    obj_required_devices = 4
-    policy_requirements = {'is_default': True}
+    policy_requirements = {'policy_type': REPL_POLICY}
+
+
+class ECProbeTest(ProbeTest):
+
+    acct_cont_required_replicas = 3
+    acct_cont_required_devices = 4
+    obj_required_replicas = 6
+    obj_required_devices = 8
+    policy_requirements = {'policy_type': EC_POLICY}


 if __name__ == "__main__":
--- a/test/probe/test_container_merge_policy_index.py
+++ b/test/probe/test_container_merge_policy_index.py
@ -26,7 +26,8 @@ from swift.common import utils, direct_client
 from swift.common.storage_policy import POLICIES
 from swift.common.http import HTTP_NOT_FOUND
 from test.probe.brain import BrainSplitter
-from test.probe.common import ReplProbeTest, ENABLED_POLICIES
+from test.probe.common import (ReplProbeTest, ENABLED_POLICIES,
+                               POLICIES_BY_TYPE, REPL_POLICY)

 from swiftclient import client, ClientException

@ -234,6 +235,18 @@ class TestContainerMergePolicyIndex(ReplProbeTest):
                        orig_policy_index, node))

    def test_reconcile_manifest(self):
+        # this test is not only testing a split brain scenario on
+        # multiple policies with mis-placed objects - it even writes out
+        # a static large object directly to the storage nodes while the
+        # objects are unavailably mis-placed from *behind* the proxy and
+        # doesn't know how to do that for EC_POLICY (clayg: why did you
+        # guys let me write a test that does this!?) - so we force
+        # wrong_policy (where the manifest gets written) to be one of
+        # any of your configured REPL_POLICY (we know you have one
+        # because this is a ReplProbeTest)
+        wrong_policy = random.choice(POLICIES_BY_TYPE[REPL_POLICY])
+        policy = random.choice([p for p in ENABLED_POLICIES
+                                if p is not wrong_policy])
        manifest_data = []

        def write_part(i):
@ -250,17 +263,14 @@ class TestContainerMergePolicyIndex(ReplProbeTest):

        # get an old container stashed
        self.brain.stop_primary_half()
-        policy = random.choice(ENABLED_POLICIES)
-        self.brain.put_container(policy.idx)
+        self.brain.put_container(int(policy))
        self.brain.start_primary_half()
        # write some parts
        for i in range(10):
            write_part(i)

        self.brain.stop_handoff_half()
-        wrong_policy = random.choice([p for p in ENABLED_POLICIES
-                                      if p is not policy])
-        self.brain.put_container(wrong_policy.idx)
+        self.brain.put_container(int(wrong_policy))
        # write some more parts
        for i in range(10, 20):
            write_part(i)
--- a/test/probe/test_empty_device_handoff.py
+++ b/test/probe/test_empty_device_handoff.py
@ -44,7 +44,9 @@ class TestEmptyDevice(ReplProbeTest):
    def test_main(self):
        # Create container
        container = 'container-%s' % uuid4()
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
+                             headers={'X-Storage-Policy':
+                                      self.policy.name})

        cpart, cnodes = self.container_ring.get_nodes(self.account, container)
        cnode = cnodes[0]
@ -58,7 +60,7 @@ class TestEmptyDevice(ReplProbeTest):

        # Delete the default data directory for objects on the primary server
        obj_dir = '%s/%s' % (self._get_objects_dir(onode),
-                             get_data_dir(self.policy.idx))
+                             get_data_dir(self.policy))
        shutil.rmtree(obj_dir, True)
        self.assertFalse(os.path.exists(obj_dir))

--- a/test/probe/test_object_async_update.py
+++ b/test/probe/test_object_async_update.py
@ -108,7 +108,9 @@ class TestUpdateOverrides(ReplProbeTest):
            'X-Backend-Container-Update-Override-Etag': 'override-etag',
            'X-Backend-Container-Update-Override-Content-Type': 'override-type'
        }
-        client.put_container(self.url, self.token, 'c1')
+        client.put_container(self.url, self.token, 'c1',
+                             headers={'X-Storage-Policy':
+                                      self.policy.name})

        self.int_client.upload_object(StringIO(u'stuff'), self.account,
                                      'c1', 'o1', headers)
--- a/test/probe/test_object_failures.py
+++ b/test/probe/test_object_failures.py
@ -52,7 +52,9 @@ def get_data_file_path(obj_dir):
 class TestObjectFailures(ReplProbeTest):

    def _setup_data_file(self, container, obj, data):
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
+                             headers={'X-Storage-Policy':
+                                      self.policy.name})
        client.put_object(self.url, self.token, container, obj, data)
        odata = client.get_object(self.url, self.token, container, obj)[-1]
        self.assertEquals(odata, data)
@ -65,7 +67,7 @@ class TestObjectFailures(ReplProbeTest):
        obj_server_conf = readconf(self.configs['object-server'][node_id])
        devices = obj_server_conf['app:object-server']['devices']
        obj_dir = '%s/%s/%s/%s/%s/%s/' % (devices, device,
-                                          get_data_dir(self.policy.idx),
+                                          get_data_dir(self.policy),
                                          opart, hash_str[-3:], hash_str)
        data_file = get_data_file_path(obj_dir)
        return onode, opart, data_file
--- a/test/probe/test_object_handoff.py
+++ b/test/probe/test_object_handoff.py
@ -30,7 +30,9 @@ class TestObjectHandoff(ReplProbeTest):
    def test_main(self):
        # Create container
        container = 'container-%s' % uuid4()
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
+                             headers={'X-Storage-Policy':
+                                      self.policy.name})

        # Kill one container/obj primary server
        cpart, cnodes = self.container_ring.get_nodes(self.account, container)
--- a/test/probe/test_object_metadata_replication.py
+++ b/test/probe/test_object_metadata_replication.py
@ -73,7 +73,8 @@ class Test(ReplProbeTest):
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
-                                   self.object_name, 'object')
+                                   self.object_name, 'object',
+                                   policy=self.policy)
        self.tempdir = mkdtemp()
        conf_path = os.path.join(self.tempdir, 'internal_client.conf')
        conf_body = """
@ -128,7 +129,7 @@ class Test(ReplProbeTest):
                                                   self.object_name)

    def test_object_delete_is_replicated(self):
-        self.brain.put_container(policy_index=0)
+        self.brain.put_container(policy_index=int(self.policy))
        # put object
        self._put_object()

@ -174,7 +175,7 @@ class Test(ReplProbeTest):
    def test_sysmeta_after_replication_with_subsequent_post(self):
        sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'}
        usermeta = {'x-object-meta-bar': 'meta-bar'}
-        self.brain.put_container(policy_index=0)
+        self.brain.put_container(policy_index=int(self.policy))
        # put object
        self._put_object()
        # put newer object with sysmeta to first server subset
@ -221,7 +222,7 @@ class Test(ReplProbeTest):
    def test_sysmeta_after_replication_with_prior_post(self):
        sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'}
        usermeta = {'x-object-meta-bar': 'meta-bar'}
-        self.brain.put_container(policy_index=0)
+        self.brain.put_container(policy_index=int(self.policy))
        # put object
        self._put_object()

--- a/test/probe/test_reconstructor_durable.py
+++ b/test/probe/test_reconstructor_durable.py
@ -0,0 +1,157 @@
+#!/usr/bin/python -u
+# Copyright (c) 2010-2012 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from hashlib import md5
+import unittest
+import uuid
+import random
+import os
+import errno
+
+from test.probe.common import ECProbeTest
+
+from swift.common import direct_client
+from swift.common.storage_policy import EC_POLICY
+from swift.common.manager import Manager
+
+from swiftclient import client
+
+
+class Body(object):
+
+    def __init__(self, total=3.5 * 2 ** 20):
+        self.total = total
+        self.hasher = md5()
+        self.size = 0
+        self.chunk = 'test' * 16 * 2 ** 10
+
+    @property
+    def etag(self):
+        return self.hasher.hexdigest()
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.size > self.total:
+            raise StopIteration()
+        self.size += len(self.chunk)
+        self.hasher.update(self.chunk)
+        return self.chunk
+
+    def __next__(self):
+        return self.next()
+
+
+class TestReconstructorPropDurable(ECProbeTest):
+
+    def setUp(self):
+        super(TestReconstructorPropDurable, self).setUp()
+        self.container_name = 'container-%s' % uuid.uuid4()
+        self.object_name = 'object-%s' % uuid.uuid4()
+        # sanity
+        self.assertEqual(self.policy.policy_type, EC_POLICY)
+        self.reconstructor = Manager(["object-reconstructor"])
+
+    def direct_get(self, node, part):
+        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
+        headers, data = direct_client.direct_get_object(
+            node, part, self.account, self.container_name,
+            self.object_name, headers=req_headers,
+            resp_chunk_size=64 * 2 ** 20)
+        hasher = md5()
+        for chunk in data:
+            hasher.update(chunk)
+        return hasher.hexdigest()
+
+    def _check_node(self, node, part, etag, headers_post):
+        # get fragment archive etag
+        fragment_archive_etag = self.direct_get(node, part)
+
+        # remove the .durable from the selected node
+        part_dir = self.storage_dir('object', node, part=part)
+        for dirs, subdirs, files in os.walk(part_dir):
+            for fname in files:
+                if fname.endswith('.durable'):
+                    durable = os.path.join(dirs, fname)
+                    os.remove(durable)
+                    break
+        try:
+            os.remove(os.path.join(part_dir, 'hashes.pkl'))
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
+
+        # fire up reconstructor to propogate the .durable
+        self.reconstructor.once()
+
+        # fragment is still exactly as it was before!
+        self.assertEqual(fragment_archive_etag,
+                         self.direct_get(node, part))
+
+        # check meta
+        meta = client.head_object(self.url, self.token,
+                                  self.container_name,
+                                  self.object_name)
+        for key in headers_post:
+            self.assertTrue(key in meta)
+            self.assertEqual(meta[key], headers_post[key])
+
+    def _format_node(self, node):
+        return '%s#%s' % (node['device'], node['index'])
+
+    def test_main(self):
+        # create EC container
+        headers = {'X-Storage-Policy': self.policy.name}
+        client.put_container(self.url, self.token, self.container_name,
+                             headers=headers)
+
+        # PUT object
+        contents = Body()
+        headers = {'x-object-meta-foo': 'meta-foo'}
+        headers_post = {'x-object-meta-bar': 'meta-bar'}
+
+        etag = client.put_object(self.url, self.token,
+                                 self.container_name,
+                                 self.object_name,
+                                 contents=contents, headers=headers)
+        client.post_object(self.url, self.token, self.container_name,
+                           self.object_name, headers=headers_post)
+        del headers_post['X-Auth-Token']  # WTF, where did this come from?
+
+        # built up a list of node lists to kill a .durable from,
+        # first try a single node
+        # then adjacent nodes and then nodes >1 node apart
+        opart, onodes = self.object_ring.get_nodes(
+            self.account, self.container_name, self.object_name)
+        single_node = [random.choice(onodes)]
+        adj_nodes = [onodes[0], onodes[-1]]
+        far_nodes = [onodes[0], onodes[-2]]
+        test_list = [single_node, adj_nodes, far_nodes]
+
+        for node_list in test_list:
+            for onode in node_list:
+                try:
+                    self._check_node(onode, opart, etag, headers_post)
+                except AssertionError as e:
+                    self.fail(
+                        str(e) + '\n... for node %r of scenario %r' % (
+                            self._format_node(onode),
+                            [self._format_node(n) for n in node_list]))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/probe/test_reconstructor_rebuild.py
+++ b/test/probe/test_reconstructor_rebuild.py
@ -0,0 +1,170 @@
+#!/usr/bin/python -u
+# Copyright (c) 2010-2012 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from hashlib import md5
+import unittest
+import uuid
+import shutil
+import random
+
+from test.probe.common import ECProbeTest
+
+from swift.common import direct_client
+from swift.common.storage_policy import EC_POLICY
+from swift.common.manager import Manager
+
+from swiftclient import client
+
+
+class Body(object):
+
+    def __init__(self, total=3.5 * 2 ** 20):
+        self.total = total
+        self.hasher = md5()
+        self.size = 0
+        self.chunk = 'test' * 16 * 2 ** 10
+
+    @property
+    def etag(self):
+        return self.hasher.hexdigest()
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.size > self.total:
+            raise StopIteration()
+        self.size += len(self.chunk)
+        self.hasher.update(self.chunk)
+        return self.chunk
+
+    def __next__(self):
+        return self.next()
+
+
+class TestReconstructorRebuild(ECProbeTest):
+
+    def setUp(self):
+        super(TestReconstructorRebuild, self).setUp()
+        self.container_name = 'container-%s' % uuid.uuid4()
+        self.object_name = 'object-%s' % uuid.uuid4()
+        # sanity
+        self.assertEqual(self.policy.policy_type, EC_POLICY)
+        self.reconstructor = Manager(["object-reconstructor"])
+
+    def proxy_get(self):
+        # GET object
+        headers, body = client.get_object(self.url, self.token,
+                                          self.container_name,
+                                          self.object_name,
+                                          resp_chunk_size=64 * 2 ** 10)
+        resp_checksum = md5()
+        for chunk in body:
+            resp_checksum.update(chunk)
+        return resp_checksum.hexdigest()
+
+    def direct_get(self, node, part):
+        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
+        headers, data = direct_client.direct_get_object(
+            node, part, self.account, self.container_name,
+            self.object_name, headers=req_headers,
+            resp_chunk_size=64 * 2 ** 20)
+        hasher = md5()
+        for chunk in data:
+            hasher.update(chunk)
+        return hasher.hexdigest()
+
+    def _check_node(self, node, part, etag, headers_post):
+        # get fragment archive etag
+        fragment_archive_etag = self.direct_get(node, part)
+
+        # remove data from the selected node
+        part_dir = self.storage_dir('object', node, part=part)
+        shutil.rmtree(part_dir, True)
+
+        # this node can't servce the data any more
+        try:
+            self.direct_get(node, part)
+        except direct_client.DirectClientException as err:
+            self.assertEqual(err.http_status, 404)
+        else:
+            self.fail('Node data on %r was not fully destoryed!' %
+                      (node,))
+
+        # make sure we can still GET the object and its correct, the
+        # proxy is doing decode on remaining fragments to get the obj
+        self.assertEqual(etag, self.proxy_get())
+
+        # fire up reconstructor
+        self.reconstructor.once()
+
+        # fragment is rebuilt exactly as it was before!
+        self.assertEqual(fragment_archive_etag,
+                         self.direct_get(node, part))
+
+        # check meta
+        meta = client.head_object(self.url, self.token,
+                                  self.container_name,
+                                  self.object_name)
+        for key in headers_post:
+            self.assertTrue(key in meta)
+            self.assertEqual(meta[key], headers_post[key])
+
+    def _format_node(self, node):
+        return '%s#%s' % (node['device'], node['index'])
+
+    def test_main(self):
+        # create EC container
+        headers = {'X-Storage-Policy': self.policy.name}
+        client.put_container(self.url, self.token, self.container_name,
+                             headers=headers)
+
+        # PUT object
+        contents = Body()
+        headers = {'x-object-meta-foo': 'meta-foo'}
+        headers_post = {'x-object-meta-bar': 'meta-bar'}
+
+        etag = client.put_object(self.url, self.token,
+                                 self.container_name,
+                                 self.object_name,
+                                 contents=contents, headers=headers)
+        client.post_object(self.url, self.token, self.container_name,
+                           self.object_name, headers=headers_post)
+        del headers_post['X-Auth-Token']  # WTF, where did this come from?
+
+        # built up a list of node lists to kill data from,
+        # first try a single node
+        # then adjacent nodes and then nodes >1 node apart
+        opart, onodes = self.object_ring.get_nodes(
+            self.account, self.container_name, self.object_name)
+        single_node = [random.choice(onodes)]
+        adj_nodes = [onodes[0], onodes[-1]]
+        far_nodes = [onodes[0], onodes[-2]]
+        test_list = [single_node, adj_nodes, far_nodes]
+
+        for node_list in test_list:
+            for onode in node_list:
+                try:
+                    self._check_node(onode, opart, etag, headers_post)
+                except AssertionError as e:
+                    self.fail(
+                        str(e) + '\n... for node %r of scenario %r' % (
+                            self._format_node(onode),
+                            [self._format_node(n) for n in node_list]))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/probe/test_reconstructor_revert.py
+++ b/test/probe/test_reconstructor_revert.py
@ -0,0 +1,376 @@
+#!/usr/bin/python -u
+# Copyright (c) 2010-2012 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from hashlib import md5
+import unittest
+import uuid
+import os
+import random
+import shutil
+from collections import defaultdict
+
+from test.probe.common import ECProbeTest
+
+from swift.common import direct_client
+from swift.common.storage_policy import EC_POLICY
+from swift.common.manager import Manager
+from swift.common.utils import renamer
+from swift.obj import reconstructor
+
+from swiftclient import client
+
+
+class Body(object):
+
+    def __init__(self, total=3.5 * 2 ** 20):
+        self.total = total
+        self.hasher = md5()
+        self.size = 0
+        self.chunk = 'test' * 16 * 2 ** 10
+
+    @property
+    def etag(self):
+        return self.hasher.hexdigest()
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.size > self.total:
+            raise StopIteration()
+        self.size += len(self.chunk)
+        self.hasher.update(self.chunk)
+        return self.chunk
+
+    def __next__(self):
+        return self.next()
+
+
+class TestReconstructorRevert(ECProbeTest):
+
+    def setUp(self):
+        super(TestReconstructorRevert, self).setUp()
+        self.container_name = 'container-%s' % uuid.uuid4()
+        self.object_name = 'object-%s' % uuid.uuid4()
+
+        # sanity
+        self.assertEqual(self.policy.policy_type, EC_POLICY)
+        self.reconstructor = Manager(["object-reconstructor"])
+
+    def kill_drive(self, device):
+        if os.path.ismount(device):
+            os.system('sudo umount %s' % device)
+        else:
+            renamer(device, device + "X")
+
+    def revive_drive(self, device):
+        disabled_name = device + "X"
+        if os.path.isdir(disabled_name):
+            renamer(device + "X", device)
+        else:
+            os.system('sudo mount %s' % device)
+
+    def proxy_get(self):
+        # GET object
+        headers, body = client.get_object(self.url, self.token,
+                                          self.container_name,
+                                          self.object_name,
+                                          resp_chunk_size=64 * 2 ** 10)
+        resp_checksum = md5()
+        for chunk in body:
+            resp_checksum.update(chunk)
+        return resp_checksum.hexdigest()
+
+    def direct_get(self, node, part):
+        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
+        headers, data = direct_client.direct_get_object(
+            node, part, self.account, self.container_name,
+            self.object_name, headers=req_headers,
+            resp_chunk_size=64 * 2 ** 20)
+        hasher = md5()
+        for chunk in data:
+            hasher.update(chunk)
+        return hasher.hexdigest()
+
+    def test_revert_object(self):
+        # create EC container
+        headers = {'X-Storage-Policy': self.policy.name}
+        client.put_container(self.url, self.token, self.container_name,
+                             headers=headers)
+
+        # get our node lists
+        opart, onodes = self.object_ring.get_nodes(
+            self.account, self.container_name, self.object_name)
+        hnodes = self.object_ring.get_more_nodes(opart)
+
+        # kill 2 a parity count number of primary nodes so we can
+        # force data onto handoffs, we do that by renaming dev dirs
+        # to induce 507
+        p_dev1 = self.device_dir('object', onodes[0])
+        p_dev2 = self.device_dir('object', onodes[1])
+        self.kill_drive(p_dev1)
+        self.kill_drive(p_dev2)
+
+        # PUT object
+        contents = Body()
+        headers = {'x-object-meta-foo': 'meta-foo'}
+        headers_post = {'x-object-meta-bar': 'meta-bar'}
+        client.put_object(self.url, self.token, self.container_name,
+                          self.object_name, contents=contents,
+                          headers=headers)
+        client.post_object(self.url, self.token, self.container_name,
+                           self.object_name, headers=headers_post)
+        del headers_post['X-Auth-Token']  # WTF, where did this come from?
+
+        # these primaries can't servce the data any more, we expect 507
+        # here and not 404 because we're using mount_check to kill nodes
+        for onode in (onodes[0], onodes[1]):
+            try:
+                self.direct_get(onode, opart)
+            except direct_client.DirectClientException as err:
+                self.assertEqual(err.http_status, 507)
+            else:
+                self.fail('Node data on %r was not fully destoryed!' %
+                          (onode,))
+
+        # now take out another primary
+        p_dev3 = self.device_dir('object', onodes[2])
+        self.kill_drive(p_dev3)
+
+        # this node can't servce the data any more
+        try:
+            self.direct_get(onodes[2], opart)
+        except direct_client.DirectClientException as err:
+            self.assertEqual(err.http_status, 507)
+        else:
+            self.fail('Node data on %r was not fully destoryed!' %
+                      (onode,))
+
+        # make sure we can still GET the object and its correct
+        # we're now pulling from handoffs and reconstructing
+        etag = self.proxy_get()
+        self.assertEqual(etag, contents.etag)
+
+        # rename the dev dirs so they don't 507 anymore
+        self.revive_drive(p_dev1)
+        self.revive_drive(p_dev2)
+        self.revive_drive(p_dev3)
+
+        # fire up reconstructor on handoff nodes only
+        for hnode in hnodes:
+            hnode_id = (hnode['port'] - 6000) / 10
+            self.reconstructor.once(number=hnode_id)
+
+        # first threee primaries have data again
+        for onode in (onodes[0], onodes[2]):
+            self.direct_get(onode, opart)
+
+        # check meta
+        meta = client.head_object(self.url, self.token,
+                                  self.container_name,
+                                  self.object_name)
+        for key in headers_post:
+            self.assertTrue(key in meta)
+            self.assertEqual(meta[key], headers_post[key])
+
+        # handoffs are empty
+        for hnode in hnodes:
+            try:
+                self.direct_get(hnode, opart)
+            except direct_client.DirectClientException as err:
+                self.assertEqual(err.http_status, 404)
+            else:
+                self.fail('Node data on %r was not fully destoryed!' %
+                          (hnode,))
+
+    def test_delete_propogate(self):
+        # create EC container
+        headers = {'X-Storage-Policy': self.policy.name}
+        client.put_container(self.url, self.token, self.container_name,
+                             headers=headers)
+
+        # get our node lists
+        opart, onodes = self.object_ring.get_nodes(
+            self.account, self.container_name, self.object_name)
+        hnodes = self.object_ring.get_more_nodes(opart)
+        p_dev2 = self.device_dir('object', onodes[1])
+
+        # PUT object
+        contents = Body()
+        client.put_object(self.url, self.token, self.container_name,
+                          self.object_name, contents=contents)
+
+        # now lets shut one down
+        self.kill_drive(p_dev2)
+
+        # delete on the ones that are left
+        client.delete_object(self.url, self.token,
+                             self.container_name,
+                             self.object_name)
+
+        # spot check a node
+        try:
+            self.direct_get(onodes[0], opart)
+        except direct_client.DirectClientException as err:
+            self.assertEqual(err.http_status, 404)
+        else:
+            self.fail('Node data on %r was not fully destoryed!' %
+                      (onodes[0],))
+
+        # enable the first node again
+        self.revive_drive(p_dev2)
+
+        # propogate the delete...
+        # fire up reconstructor on handoff nodes only
+        for hnode in hnodes:
+            hnode_id = (hnode['port'] - 6000) / 10
+            self.reconstructor.once(number=hnode_id)
+
+        # check the first node to make sure its gone
+        try:
+            self.direct_get(onodes[1], opart)
+        except direct_client.DirectClientException as err:
+            self.assertEqual(err.http_status, 404)
+        else:
+            self.fail('Node data on %r was not fully destoryed!' %
+                      (onodes[0]))
+
+        # make sure proxy get can't find it
+        try:
+            self.proxy_get()
+        except Exception as err:
+            self.assertEqual(err.http_status, 404)
+        else:
+            self.fail('Node data on %r was not fully destoryed!' %
+                      (onodes[0]))
+
+    def test_reconstruct_from_reverted_fragment_archive(self):
+        headers = {'X-Storage-Policy': self.policy.name}
+        client.put_container(self.url, self.token, self.container_name,
+                             headers=headers)
+
+        # get our node lists
+        opart, onodes = self.object_ring.get_nodes(
+            self.account, self.container_name, self.object_name)
+
+        # find a primary server that only has one of it's devices in the
+        # primary node list
+        group_nodes_by_config = defaultdict(list)
+        for n in onodes:
+            group_nodes_by_config[self.config_number(n)].append(n)
+        for config_number, node_list in group_nodes_by_config.items():
+            if len(node_list) == 1:
+                break
+        else:
+            self.fail('ring balancing did not use all available nodes')
+        primary_node = node_list[0]
+        primary_device = self.device_dir('object', primary_node)
+        self.kill_drive(primary_device)
+
+        # PUT object
+        contents = Body()
+        etag = client.put_object(self.url, self.token, self.container_name,
+                                 self.object_name, contents=contents)
+        self.assertEqual(contents.etag, etag)
+
+        # fix the primary device and sanity GET
+        self.revive_drive(primary_device)
+        self.assertEqual(etag, self.proxy_get())
+
+        # find a handoff holding the fragment
+        for hnode in self.object_ring.get_more_nodes(opart):
+            try:
+                reverted_fragment_etag = self.direct_get(hnode, opart)
+            except direct_client.DirectClientException as err:
+                if err.http_status != 404:
+                    raise
+            else:
+                break
+        else:
+            self.fail('Unable to find handoff fragment!')
+
+        # we'll force the handoff device to revert instead of potentially
+        # racing with rebuild by deleting any other fragments that may be on
+        # the same server
+        handoff_fragment_etag = None
+        for node in onodes:
+            if node['port'] == hnode['port']:
+                # we'll keep track of the etag of this fragment we're removing
+                # in case we need it later (queue forshadowing music)...
+                try:
+                    handoff_fragment_etag = self.direct_get(node, opart)
+                except direct_client.DirectClientException as err:
+                    if err.http_status != 404:
+                        raise
+                    # this just means our handoff device was on the same
+                    # machine as the primary!
+                    continue
+                # use the primary nodes device - not the hnode device
+                part_dir = self.storage_dir('object', node, part=opart)
+                shutil.rmtree(part_dir, True)
+
+        # revert from handoff device with reconstructor
+        self.reconstructor.once(number=self.config_number(hnode))
+
+        # verify fragment reverted to primary server
+        self.assertEqual(reverted_fragment_etag,
+                         self.direct_get(primary_node, opart))
+
+        # now we'll remove some data on one of the primary node's partners
+        partner = random.choice(reconstructor._get_partners(
+            primary_node['index'], onodes))
+
+        try:
+            rebuilt_fragment_etag = self.direct_get(partner, opart)
+        except direct_client.DirectClientException as err:
+            if err.http_status != 404:
+                raise
+            # partner already had it's fragment removed
+            if (handoff_fragment_etag is not None and
+                    hnode['port'] == partner['port']):
+                # oh, well that makes sense then...
+                rebuilt_fragment_etag = handoff_fragment_etag
+            else:
+                # I wonder what happened?
+                self.fail('Partner inexplicably missing fragment!')
+        part_dir = self.storage_dir('object', partner, part=opart)
+        shutil.rmtree(part_dir, True)
+
+        # sanity, it's gone
+        try:
+            self.direct_get(partner, opart)
+        except direct_client.DirectClientException as err:
+            if err.http_status != 404:
+                raise
+        else:
+            self.fail('successful GET of removed partner fragment archive!?')
+
+        # and force the primary node to do a rebuild
+        self.reconstructor.once(number=self.config_number(primary_node))
+
+        # and validate the partners rebuilt_fragment_etag
+        try:
+            self.assertEqual(rebuilt_fragment_etag,
+                             self.direct_get(partner, opart))
+        except direct_client.DirectClientException as err:
+            if err.http_status != 404:
+                raise
+            else:
+                self.fail('Did not find rebuilt fragment on partner node')
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/probe/test_replication_servers_working.py
+++ b/test/probe/test_replication_servers_working.py
@ -21,7 +21,6 @@ import time
 import shutil

 from swiftclient import client
-from swift.common.storage_policy import POLICIES
 from swift.obj.diskfile import get_data_dir

 from test.probe.common import ReplProbeTest
@ -88,7 +87,7 @@ class TestReplicatorFunctions(ReplProbeTest):
        # Delete file "hashes.pkl".
        # Check, that all files were replicated.
        path_list = []
-        data_dir = get_data_dir(POLICIES.default.idx)
+        data_dir = get_data_dir(self.policy)
        # Figure out where the devices are
        for node_id in range(1, 5):
            conf = readconf(self.configs['object-server'][node_id])
@ -100,7 +99,9 @@ class TestReplicatorFunctions(ReplProbeTest):

        # Put data to storage nodes
        container = 'container-%s' % uuid4()
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
+                             headers={'X-Storage-Policy':
+                                      self.policy.name})

        obj = 'object-%s' % uuid4()
        client.put_object(self.url, self.token, container, obj, 'VERIFY')
--- a/test/unit/init.py
+++ b/test/unit/init.py
@ -22,24 +22,30 @@ import errno
 import sys
 from contextlib import contextmanager, closing
 from collections import defaultdict, Iterable
+import itertools
 from numbers import Number
 from tempfile import NamedTemporaryFile
 import time
+import eventlet
 from eventlet.green import socket
 from tempfile import mkdtemp
 from shutil import rmtree
+from swift.common.utils import Timestamp
 from test import get_config
 from swift.common import swob, utils
 from swift.common.ring import Ring, RingData
 from hashlib import md5
-from eventlet import sleep, Timeout
 import logging.handlers
 from httplib import HTTPException
 from swift.common import storage_policy
+from swift.common.storage_policy import StoragePolicy, ECStoragePolicy
 import functools
 import cPickle as pickle
 from gzip import GzipFile
 import mock as mocklib
+import inspect
+
+EMPTY_ETAG = md5().hexdigest()

 # try not to import this module from swift
 if not os.path.basename(sys.argv[0]).startswith('swift'):
@ -47,26 +53,40 @@ if not os.path.basename(sys.argv[0]).startswith('swift'):
    utils.HASH_PATH_SUFFIX = 'endcap'


-def patch_policies(thing_or_policies=None, legacy_only=False):
-    if legacy_only:
-        default_policies = [storage_policy.StoragePolicy(
-            0, 'legacy', True, object_ring=FakeRing())]
-    else:
-        default_policies = [
-            storage_policy.StoragePolicy(
-                0, 'nulo', True, object_ring=FakeRing()),
-            storage_policy.StoragePolicy(
-                1, 'unu', object_ring=FakeRing()),
-        ]
-
-    thing_or_policies = thing_or_policies or default_policies
-
+def patch_policies(thing_or_policies=None, legacy_only=False,
+                   with_ec_default=False, fake_ring_args=None):
    if isinstance(thing_or_policies, (
            Iterable, storage_policy.StoragePolicyCollection)):
-        return PatchPolicies(thing_or_policies)
+        return PatchPolicies(thing_or_policies, fake_ring_args=fake_ring_args)
+
+    if legacy_only:
+        default_policies = [
+            StoragePolicy(0, name='legacy', is_default=True),
+        ]
+        default_ring_args = [{}]
+    elif with_ec_default:
+        default_policies = [
+            ECStoragePolicy(0, name='ec', is_default=True,
+                            ec_type='jerasure_rs_vand', ec_ndata=10,
+                            ec_nparity=4, ec_segment_size=4096),
+            StoragePolicy(1, name='unu'),
+        ]
+        default_ring_args = [{'replicas': 14}, {}]
    else:
-        # it's a thing!
-        return PatchPolicies(default_policies)(thing_or_policies)
+        default_policies = [
+            StoragePolicy(0, name='nulo', is_default=True),
+            StoragePolicy(1, name='unu'),
+        ]
+        default_ring_args = [{}, {}]
+
+    fake_ring_args = fake_ring_args or default_ring_args
+    decorator = PatchPolicies(default_policies, fake_ring_args=fake_ring_args)
+
+    if not thing_or_policies:
+        return decorator
+    else:
+        # it's a thing, we return the wrapped thing instead of the decorator
+        return decorator(thing_or_policies)


 class PatchPolicies(object):
@ -76,11 +96,33 @@ class PatchPolicies(object):
    patched yet)
    """

-    def __init__(self, policies):
+    def __init__(self, policies, fake_ring_args=None):
        if isinstance(policies, storage_policy.StoragePolicyCollection):
            self.policies = policies
        else:
            self.policies = storage_policy.StoragePolicyCollection(policies)
+        self.fake_ring_args = fake_ring_args or [None] * len(self.policies)
+
+    def _setup_rings(self):
+        """
+        Our tests tend to use the policies rings like their own personal
+        playground - which can be a problem in the particular case of a
+        patched TestCase class where the FakeRing objects are scoped in the
+        call to the patch_policies wrapper outside of the TestCase instance
+        which can lead to some bled state.
+
+        To help tests get better isolation without having to think about it,
+        here we're capturing the args required to *build* a new FakeRing
+        instances so we can ensure each test method gets a clean ring setup.
+
+        The TestCase can always "tweak" these fresh rings in setUp - or if
+        they'd prefer to get the same "reset" behavior with custom FakeRing's
+        they can pass in their own fake_ring_args to patch_policies instead of
+        setting the object_ring on the policy definitions.
+        """
+        for policy, fake_ring_arg in zip(self.policies, self.fake_ring_args):
+            if fake_ring_arg is not None:
+                policy.object_ring = FakeRing(**fake_ring_arg)

    def __call__(self, thing):
        if isinstance(thing, type):
@ -89,24 +131,33 @@ class PatchPolicies(object):
            return self._patch_method(thing)

    def _patch_class(self, cls):
+        """
+        Creating a new class that inherits from decorated class is the more
+        common way I've seen class decorators done - but it seems to cause
+        infinite recursion when super is called from inside methods in the
+        decorated class.
+        """

-        class NewClass(cls):
+        orig_setUp = cls.setUp
+        orig_tearDown = cls.tearDown

-            already_patched = False
+        def setUp(cls_self):
+            self._orig_POLICIES = storage_policy._POLICIES
+            if not getattr(cls_self, '_policies_patched', False):
+                storage_policy._POLICIES = self.policies
+                self._setup_rings()
+                cls_self._policies_patched = True

-            def setUp(cls_self):
-                self._orig_POLICIES = storage_policy._POLICIES
-                if not cls_self.already_patched:
-                    storage_policy._POLICIES = self.policies
-                    cls_self.already_patched = True
-                super(NewClass, cls_self).setUp()
+            orig_setUp(cls_self)

-            def tearDown(cls_self):
-                super(NewClass, cls_self).tearDown()
-                storage_policy._POLICIES = self._orig_POLICIES
+        def tearDown(cls_self):
+            orig_tearDown(cls_self)
+            storage_policy._POLICIES = self._orig_POLICIES

-        NewClass.__name__ = cls.__name__
-        return NewClass
+        cls.setUp = setUp
+        cls.tearDown = tearDown
+
+        return cls

    def _patch_method(self, f):
        @functools.wraps(f)
@ -114,6 +165,7 @@ class PatchPolicies(object):
            self._orig_POLICIES = storage_policy._POLICIES
            try:
                storage_policy._POLICIES = self.policies
+                self._setup_rings()
                return f(*args, **kwargs)
            finally:
                storage_policy._POLICIES = self._orig_POLICIES
@ -171,14 +223,16 @@ class FakeRing(Ring):
        return self.replicas

    def _get_part_nodes(self, part):
-        return list(self._devs)
+        return [dict(node, index=i) for i, node in enumerate(list(self._devs))]

    def get_more_nodes(self, part):
        # replicas^2 is the true cap
        for x in xrange(self.replicas, min(self.replicas + self.max_more_nodes,
                                           self.replicas * self.replicas)):
            yield {'ip': '10.0.0.%s' % x,
+                   'replication_ip': '10.0.0.%s' % x,
                   'port': self._base_port + x,
+                   'replication_port': self._base_port + x,
                   'device': 'sda',
                   'zone': x % 3,
                   'region': x % 2,
@ -206,6 +260,48 @@ def write_fake_ring(path, *devs):
        pickle.dump(RingData(replica2part2dev_id, devs, part_shift), f)


+class FabricatedRing(Ring):
+    """
+    When a FakeRing just won't do - you can fabricate one to meet
+    your tests needs.
+    """
+
+    def __init__(self, replicas=6, devices=8, nodes=4, port=6000,
+                 part_power=4):
+        self.devices = devices
+        self.nodes = nodes
+        self.port = port
+        self.replicas = 6
+        self.part_power = part_power
+        self._part_shift = 32 - self.part_power
+        self._reload()
+
+    def _reload(self, *args, **kwargs):
+        self._rtime = time.time() * 2
+        if hasattr(self, '_replica2part2dev_id'):
+            return
+        self._devs = [{
+            'region': 1,
+            'zone': 1,
+            'weight': 1.0,
+            'id': i,
+            'device': 'sda%d' % i,
+            'ip': '10.0.0.%d' % (i % self.nodes),
+            'replication_ip': '10.0.0.%d' % (i % self.nodes),
+            'port': self.port,
+            'replication_port': self.port,
+        } for i in range(self.devices)]
+
+        self._replica2part2dev_id = [
+            [None] * 2 ** self.part_power
+            for i in range(self.replicas)
+        ]
+        dev_ids = itertools.cycle(range(self.devices))
+        for p in range(2 ** self.part_power):
+            for r in range(self.replicas):
+                self._replica2part2dev_id[r][p] = next(dev_ids)
+
+
 class FakeMemcache(object):

    def __init__(self):
@ -363,8 +459,8 @@ class UnmockTimeModule(object):
 logging.time = UnmockTimeModule()


-class FakeLogger(logging.Logger):
-    # a thread safe logger
+class FakeLogger(logging.Logger, object):
+    # a thread safe fake logger

    def __init__(self, *args, **kwargs):
        self._clear()
@ -376,22 +472,31 @@ class FakeLogger(logging.Logger):
        self.thread_locals = None
        self.parent = None

+    store_in = {
+        logging.ERROR: 'error',
+        logging.WARNING: 'warning',
+        logging.INFO: 'info',
+        logging.DEBUG: 'debug',
+        logging.CRITICAL: 'critical',
+    }
+
+    def _log(self, level, msg, *args, **kwargs):
+        store_name = self.store_in[level]
+        cargs = [msg]
+        if any(args):
+            cargs.extend(args)
+        captured = dict(kwargs)
+        if 'exc_info' in kwargs and \
+                not isinstance(kwargs['exc_info'], tuple):
+            captured['exc_info'] = sys.exc_info()
+        self.log_dict[store_name].append((tuple(cargs), captured))
+        super(FakeLogger, self)._log(level, msg, *args, **kwargs)
+
    def _clear(self):
        self.log_dict = defaultdict(list)
        self.lines_dict = {'critical': [], 'error': [], 'info': [],
                           'warning': [], 'debug': []}

-    def _store_in(store_name):
-        def stub_fn(self, *args, **kwargs):
-            self.log_dict[store_name].append((args, kwargs))
-        return stub_fn
-
-    def _store_and_log_in(store_name, level):
-        def stub_fn(self, *args, **kwargs):
-            self.log_dict[store_name].append((args, kwargs))
-            self._log(level, args[0], args[1:], **kwargs)
-        return stub_fn
-
    def get_lines_for_level(self, level):
        if level not in self.lines_dict:
            raise KeyError(
@ -404,16 +509,10 @@ class FakeLogger(logging.Logger):
        return dict((level, msgs) for level, msgs in self.lines_dict.items()
                    if len(msgs) > 0)

-    error = _store_and_log_in('error', logging.ERROR)
-    info = _store_and_log_in('info', logging.INFO)
-    warning = _store_and_log_in('warning', logging.WARNING)
-    warn = _store_and_log_in('warning', logging.WARNING)
-    debug = _store_and_log_in('debug', logging.DEBUG)
-
-    def exception(self, *args, **kwargs):
-        self.log_dict['exception'].append((args, kwargs,
-                                           str(sys.exc_info()[1])))
-        print 'FakeLogger Exception: %s' % self.log_dict
+    def _store_in(store_name):
+        def stub_fn(self, *args, **kwargs):
+            self.log_dict[store_name].append((args, kwargs))
+        return stub_fn

    # mock out the StatsD logging methods:
    update_stats = _store_in('update_stats')
@ -605,19 +704,53 @@ def mock(update):
            delattr(module, attr)


+class SlowBody(object):
+    """
+    This will work with our fake_http_connect, if you hand in these
+    instead of strings it will make reads take longer by the given
+    amount.  It should be a little bit easier to extend than the
+    current slow kwarg - which inserts whitespace in the response.
+    Also it should be easy to detect if you have one of these (or a
+    subclass) for the body inside of FakeConn if we wanted to do
+    something smarter than just duck-type the str/buffer api
+    enough to get by.
+    """
+
+    def __init__(self, body, slowness):
+        self.body = body
+        self.slowness = slowness
+
+    def slowdown(self):
+        eventlet.sleep(self.slowness)
+
+    def __getitem__(self, s):
+        return SlowBody(self.body[s], self.slowness)
+
+    def __len__(self):
+        return len(self.body)
+
+    def __radd__(self, other):
+        self.slowdown()
+        return other + self.body
+
+
 def fake_http_connect(*code_iter, **kwargs):

    class FakeConn(object):

        def __init__(self, status, etag=None, body='', timestamp='1',
-                     headers=None):
+                     headers=None, expect_headers=None, connection_id=None,
+                     give_send=None):
            # connect exception
-            if isinstance(status, (Exception, Timeout)):
+            if isinstance(status, (Exception, eventlet.Timeout)):
                raise status
            if isinstance(status, tuple):
-                self.expect_status, self.status = status
+                self.expect_status = list(status[:-1])
+                self.status = status[-1]
+                self.explicit_expect_list = True
            else:
-                self.expect_status, self.status = (None, status)
+                self.expect_status, self.status = ([], status)
+                self.explicit_expect_list = False
            if not self.expect_status:
                # when a swift backend service returns a status before reading
                # from the body (mostly an error response) eventlet.wsgi will
@ -628,9 +761,9 @@ def fake_http_connect(*code_iter, **kwargs):
                # our backend services and return certain types of responses
                # as expect statuses just like a real backend server would do.
                if self.status in (507, 412, 409):
-                    self.expect_status = status
+                    self.expect_status = [status]
                else:
-                    self.expect_status = 100
+                    self.expect_status = [100, 100]
            self.reason = 'Fake'
            self.host = '1.2.3.4'
            self.port = '1234'
@ -639,32 +772,41 @@ def fake_http_connect(*code_iter, **kwargs):
            self.etag = etag
            self.body = body
            self.headers = headers or {}
+            self.expect_headers = expect_headers or {}
            self.timestamp = timestamp
+            self.connection_id = connection_id
+            self.give_send = give_send
            if 'slow' in kwargs and isinstance(kwargs['slow'], list):
                try:
                    self._next_sleep = kwargs['slow'].pop(0)
                except IndexError:
                    self._next_sleep = None
+            # be nice to trixy bits with node_iter's
+            eventlet.sleep()

        def getresponse(self):
-            if isinstance(self.status, (Exception, Timeout)):
+            if self.expect_status and self.explicit_expect_list:
+                raise Exception('Test did not consume all fake '
+                                'expect status: %r' % (self.expect_status,))
+            if isinstance(self.status, (Exception, eventlet.Timeout)):
                raise self.status
            exc = kwargs.get('raise_exc')
            if exc:
-                if isinstance(exc, (Exception, Timeout)):
+                if isinstance(exc, (Exception, eventlet.Timeout)):
                    raise exc
                raise Exception('test')
            if kwargs.get('raise_timeout_exc'):
-                raise Timeout()
+                raise eventlet.Timeout()
            return self

        def getexpect(self):
-            if isinstance(self.expect_status, (Exception, Timeout)):
+            expect_status = self.expect_status.pop(0)
+            if isinstance(self.expect_status, (Exception, eventlet.Timeout)):
                raise self.expect_status
-            headers = {}
-            if self.expect_status == 409:
+            headers = dict(self.expect_headers)
+            if expect_status == 409:
                headers['X-Backend-Timestamp'] = self.timestamp
-            return FakeConn(self.expect_status, headers=headers)
+            return FakeConn(expect_status, headers=headers)

        def getheaders(self):
            etag = self.etag
@ -717,34 +859,45 @@ def fake_http_connect(*code_iter, **kwargs):
            if am_slow:
                if self.sent < 4:
                    self.sent += 1
-                    sleep(value)
+                    eventlet.sleep(value)
                    return ' '
            rv = self.body[:amt]
            self.body = self.body[amt:]
            return rv

        def send(self, amt=None):
+            if self.give_send:
+                self.give_send(self.connection_id, amt)
            am_slow, value = self.get_slow()
            if am_slow:
                if self.received < 4:
                    self.received += 1
-                    sleep(value)
+                    eventlet.sleep(value)

        def getheader(self, name, default=None):
            return swob.HeaderKeyDict(self.getheaders()).get(name, default)

+        def close(self):
+            pass
+
    timestamps_iter = iter(kwargs.get('timestamps') or ['1'] * len(code_iter))
    etag_iter = iter(kwargs.get('etags') or [None] * len(code_iter))
-    if isinstance(kwargs.get('headers'), list):
+    if isinstance(kwargs.get('headers'), (list, tuple)):
        headers_iter = iter(kwargs['headers'])
    else:
        headers_iter = iter([kwargs.get('headers', {})] * len(code_iter))
+    if isinstance(kwargs.get('expect_headers'), (list, tuple)):
+        expect_headers_iter = iter(kwargs['expect_headers'])
+    else:
+        expect_headers_iter = iter([kwargs.get('expect_headers', {})] *
+                                   len(code_iter))

    x = kwargs.get('missing_container', [False] * len(code_iter))
    if not isinstance(x, (tuple, list)):
        x = [x] * len(code_iter)
    container_ts_iter = iter(x)
    code_iter = iter(code_iter)
+    conn_id_and_code_iter = enumerate(code_iter)
    static_body = kwargs.get('body', None)
    body_iter = kwargs.get('body_iter', None)
    if body_iter:
@ -752,17 +905,22 @@ def fake_http_connect(*code_iter, **kwargs):

    def connect(*args, **ckwargs):
        if kwargs.get('slow_connect', False):
-            sleep(0.1)
+            eventlet.sleep(0.1)
        if 'give_content_type' in kwargs:
            if len(args) >= 7 and 'Content-Type' in args[6]:
                kwargs['give_content_type'](args[6]['Content-Type'])
            else:
                kwargs['give_content_type']('')
+        i, status = conn_id_and_code_iter.next()
        if 'give_connect' in kwargs:
-            kwargs['give_connect'](*args, **ckwargs)
-        status = code_iter.next()
+            give_conn_fn = kwargs['give_connect']
+            argspec = inspect.getargspec(give_conn_fn)
+            if argspec.keywords or 'connection_id' in argspec.args:
+                ckwargs['connection_id'] = i
+            give_conn_fn(*args, **ckwargs)
        etag = etag_iter.next()
        headers = headers_iter.next()
+        expect_headers = expect_headers_iter.next()
        timestamp = timestamps_iter.next()

        if status <= 0:
@ -772,7 +930,8 @@ def fake_http_connect(*code_iter, **kwargs):
        else:
            body = body_iter.next()
        return FakeConn(status, etag, body=body, timestamp=timestamp,
-                        headers=headers)
+                        headers=headers, expect_headers=expect_headers,
+                        connection_id=i, give_send=kwargs.get('give_send'))

    connect.code_iter = code_iter

@ -803,3 +962,7 @@ def mocked_http_conn(*args, **kwargs):
        left_over_status = list(fake_conn.code_iter)
        if left_over_status:
            raise AssertionError('left over status %r' % left_over_status)
+
+
+def make_timestamp_iter():
+    return iter(Timestamp(t) for t in itertools.count(int(time.time())))
--- a/test/unit/account/test_backend.py
+++ b/test/unit/account/test_backend.py
@ -747,7 +747,7 @@ def prespi_AccountBroker_initialize(self, conn, put_timestamp, **kwargs):
    The AccountBroker initialze() function before we added the
    policy stat table.  Used by test_policy_table_creation() to
    make sure that the AccountBroker will correctly add the table
-    for cases where the DB existed before the policy suport was added.
+    for cases where the DB existed before the policy support was added.

    :param conn: DB connection object
    :param put_timestamp: put timestamp
--- a/test/unit/account/test_reaper.py
+++ b/test/unit/account/test_reaper.py
@ -141,7 +141,7 @@ cont_nodes = [{'device': 'sda1',
@unit.patch_policies([StoragePolicy(0, 'zero', False,
                                    object_ring=unit.FakeRing()),
                      StoragePolicy(1, 'one', True,
-                                    object_ring=unit.FakeRing())])
+                                    object_ring=unit.FakeRing(replicas=4))])
 class TestReaper(unittest.TestCase):

    def setUp(self):
@ -215,7 +215,7 @@ class TestReaper(unittest.TestCase):
        r.stats_objects_possibly_remaining = 0
        r.myips = myips
        if fakelogger:
-            r.logger = FakeLogger()
+            r.logger = unit.debug_logger('test-reaper')
        return r

    def fake_reap_account(self, *args, **kwargs):
@ -287,7 +287,7 @@ class TestReaper(unittest.TestCase):
                              policy.idx)
                for i, call_args in enumerate(
                        fake_direct_delete.call_args_list):
-                    cnode = cont_nodes[i]
+                    cnode = cont_nodes[i % len(cont_nodes)]
                    host = '%(ip)s:%(port)s' % cnode
                    device = cnode['device']
                    headers = {
@ -297,11 +297,13 @@ class TestReaper(unittest.TestCase):
                        'X-Backend-Storage-Policy-Index': policy.idx
                    }
                    ring = r.get_object_ring(policy.idx)
-                    expected = call(ring.devs[i], 0, 'a', 'c', 'o',
+                    expected = call(dict(ring.devs[i], index=i), 0,
+                                    'a', 'c', 'o',
                                    headers=headers, conn_timeout=0.5,
                                    response_timeout=10)
                    self.assertEqual(call_args, expected)
-            self.assertEqual(r.stats_objects_deleted, 3)
+            self.assertEqual(r.stats_objects_deleted,
+                             policy.object_ring.replicas)

    def test_reap_object_fail(self):
        r = self.init_reaper({}, fakelogger=True)
@ -312,7 +314,26 @@ class TestReaper(unittest.TestCase):
                   self.fake_direct_delete_object):
            r.reap_object('a', 'c', 'partition', cont_nodes, 'o',
                          policy.idx)
-        self.assertEqual(r.stats_objects_deleted, 1)
+        # IMHO, the stat handling in the node loop of reap object is
+        # over indented, but no one has complained, so I'm not inclined
+        # to move it.  However it's worth noting we're currently keeping
+        # stats on deletes per *replica* - which is rather obvious from
+        # these tests, but this results is surprising because of some
+        # funny logic to *skip* increments on successful deletes of
+        # replicas until we have more successful responses than
+        # failures.  This means that while the first replica doesn't
+        # increment deleted because of the failure, the second one
+        # *does* get successfully deleted, but *also does not* increment
+        # the counter (!?).
+        #
+        # In the three replica case this leaves only the last deleted
+        # object incrementing the counter - in the four replica case
+        # this leaves the last two.
+        #
+        # Basically this test will always result in:
+        #   deleted == num_replicas - 2
+        self.assertEqual(r.stats_objects_deleted,
+                         policy.object_ring.replicas - 2)
        self.assertEqual(r.stats_objects_remaining, 1)
        self.assertEqual(r.stats_objects_possibly_remaining, 1)

@ -347,7 +368,7 @@ class TestReaper(unittest.TestCase):
            mocks['direct_get_container'].side_effect = fake_get_container
            r.reap_container('a', 'partition', acc_nodes, 'c')
            mock_calls = mocks['direct_delete_object'].call_args_list
-            self.assertEqual(3, len(mock_calls))
+            self.assertEqual(policy.object_ring.replicas, len(mock_calls))
            for call_args in mock_calls:
                _args, kwargs = call_args
                self.assertEqual(kwargs['headers']
@ -355,7 +376,7 @@ class TestReaper(unittest.TestCase):
                                 policy.idx)

            self.assertEquals(mocks['direct_delete_container'].call_count, 3)
-        self.assertEqual(r.stats_objects_deleted, 3)
+        self.assertEqual(r.stats_objects_deleted, policy.object_ring.replicas)

    def test_reap_container_get_object_fail(self):
        r = self.init_reaper({}, fakelogger=True)
@ -373,7 +394,7 @@ class TestReaper(unittest.TestCase):
                     self.fake_reap_object)]
        with nested(*ctx):
            r.reap_container('a', 'partition', acc_nodes, 'c')
-        self.assertEqual(r.logger.inc['return_codes.4'], 1)
+        self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 1)
        self.assertEqual(r.stats_containers_deleted, 1)

    def test_reap_container_partial_fail(self):
@ -392,7 +413,7 @@ class TestReaper(unittest.TestCase):
                     self.fake_reap_object)]
        with nested(*ctx):
            r.reap_container('a', 'partition', acc_nodes, 'c')
-        self.assertEqual(r.logger.inc['return_codes.4'], 2)
+        self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 2)
        self.assertEqual(r.stats_containers_possibly_remaining, 1)

    def test_reap_container_full_fail(self):
@ -411,7 +432,7 @@ class TestReaper(unittest.TestCase):
                     self.fake_reap_object)]
        with nested(*ctx):
            r.reap_container('a', 'partition', acc_nodes, 'c')
-        self.assertEqual(r.logger.inc['return_codes.4'], 3)
+        self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 3)
        self.assertEqual(r.stats_containers_remaining, 1)

    @patch('swift.account.reaper.Ring',
@ -436,8 +457,8 @@ class TestReaper(unittest.TestCase):

            mocks['direct_get_container'].side_effect = fake_get_container
            r.reap_container('a', 'partition', acc_nodes, 'c')
-        self.assertEqual(r.logger.msg,
-                         'ERROR: invalid storage policy index: 2')
+        self.assertEqual(r.logger.get_lines_for_level('error'), [
+            'ERROR: invalid storage policy index: 2'])

    def fake_reap_container(self, *args, **kwargs):
        self.called_amount += 1
@ -462,13 +483,16 @@ class TestReaper(unittest.TestCase):
            nodes = r.get_account_ring().get_part_nodes()
            self.assertTrue(r.reap_account(broker, 'partition', nodes))
        self.assertEqual(self.called_amount, 4)
-        self.assertEqual(r.logger.msg.find('Completed pass'), 0)
-        self.assertTrue(r.logger.msg.find('1 containers deleted'))
-        self.assertTrue(r.logger.msg.find('1 objects deleted'))
-        self.assertTrue(r.logger.msg.find('1 containers remaining'))
-        self.assertTrue(r.logger.msg.find('1 objects remaining'))
-        self.assertTrue(r.logger.msg.find('1 containers possibly remaining'))
-        self.assertTrue(r.logger.msg.find('1 objects possibly remaining'))
+        info_lines = r.logger.get_lines_for_level('info')
+        self.assertEqual(len(info_lines), 2)
+        start_line, stat_line = info_lines
+        self.assertEqual(start_line, 'Beginning pass on account a')
+        self.assertTrue(stat_line.find('1 containers deleted'))
+        self.assertTrue(stat_line.find('1 objects deleted'))
+        self.assertTrue(stat_line.find('1 containers remaining'))
+        self.assertTrue(stat_line.find('1 objects remaining'))
+        self.assertTrue(stat_line.find('1 containers possibly remaining'))
+        self.assertTrue(stat_line.find('1 objects possibly remaining'))

    def test_reap_account_no_container(self):
        broker = FakeAccountBroker(tuple())
@ -482,7 +506,8 @@ class TestReaper(unittest.TestCase):
        with nested(*ctx):
            nodes = r.get_account_ring().get_part_nodes()
            self.assertTrue(r.reap_account(broker, 'partition', nodes))
-        self.assertEqual(r.logger.msg.find('Completed pass'), 0)
+        self.assertTrue(r.logger.get_lines_for_level(
+            'info')[-1].startswith('Completed pass'))
        self.assertEqual(self.called_amount, 0)

    def test_reap_device(self):
--- a/test/unit/cli/test_info.py
+++ b/test/unit/cli/test_info.py
@ -386,6 +386,17 @@ class TestPrintObjFullMeta(TestCliInfoBase):
            print_obj(self.datafile, swift_dir=self.testdir)
        self.assertTrue('/objects-1/' in out.getvalue())

+    def test_print_obj_meta_and_ts_files(self):
+        # verify that print_obj will also read from meta and ts files
+        base = os.path.splitext(self.datafile)[0]
+        for ext in ('.meta', '.ts'):
+            test_file = '%s%s' % (base, ext)
+            os.link(self.datafile, test_file)
+            out = StringIO()
+            with mock.patch('sys.stdout', out):
+                print_obj(test_file, swift_dir=self.testdir)
+            self.assertTrue('/objects-1/' in out.getvalue())
+
    def test_print_obj_no_ring(self):
        no_rings_dir = os.path.join(self.testdir, 'no_rings_here')
        os.mkdir(no_rings_dir)
@ -435,14 +446,14 @@ class TestPrintObjFullMeta(TestCliInfoBase):
        self.assertRaisesMessage(ValueError, 'Metadata is None',
                                 print_obj_metadata, [])

-        def reset_metadata():
+        def get_metadata(items):
            md = dict(name='/AUTH_admin/c/dummy')
            md['Content-Type'] = 'application/octet-stream'
            md['X-Timestamp'] = 106.3
-            md['X-Object-Meta-Mtime'] = '107.3'
+            md.update(items)
            return md

-        metadata = reset_metadata()
+        metadata = get_metadata({'X-Object-Meta-Mtime': '107.3'})
        out = StringIO()
        with mock.patch('sys.stdout', out):
            print_obj_metadata(metadata)
@ -453,17 +464,93 @@ class TestPrintObjFullMeta(TestCliInfoBase):
  Object hash: 128fdf98bddd1b1e8695f4340e67a67a
 Content-Type: application/octet-stream
 Timestamp: 1970-01-01T00:01:46.300000 (%s)
-User Metadata: {'X-Object-Meta-Mtime': '107.3'}''' % (
+System Metadata:
+  No metadata found
+User Metadata:
+  X-Object-Meta-Mtime: 107.3
+Other Metadata:
+  No metadata found''' % (
            utils.Timestamp(106.3).internal)

        self.assertEquals(out.getvalue().strip(), exp_out)

-        metadata = reset_metadata()
+        metadata = get_metadata({
+            'X-Object-Sysmeta-Mtime': '107.3',
+            'X-Object-Sysmeta-Name': 'Obj name',
+        })
+        out = StringIO()
+        with mock.patch('sys.stdout', out):
+            print_obj_metadata(metadata)
+        exp_out = '''Path: /AUTH_admin/c/dummy
+  Account: AUTH_admin
+  Container: c
+  Object: dummy
+  Object hash: 128fdf98bddd1b1e8695f4340e67a67a
+Content-Type: application/octet-stream
+Timestamp: 1970-01-01T00:01:46.300000 (%s)
+System Metadata:
+  X-Object-Sysmeta-Mtime: 107.3
+  X-Object-Sysmeta-Name: Obj name
+User Metadata:
+  No metadata found
+Other Metadata:
+  No metadata found''' % (
+            utils.Timestamp(106.3).internal)
+
+        self.assertEquals(out.getvalue().strip(), exp_out)
+
+        metadata = get_metadata({
+            'X-Object-Meta-Mtime': '107.3',
+            'X-Object-Sysmeta-Mtime': '107.3',
+            'X-Object-Mtime': '107.3',
+        })
+        out = StringIO()
+        with mock.patch('sys.stdout', out):
+            print_obj_metadata(metadata)
+        exp_out = '''Path: /AUTH_admin/c/dummy
+  Account: AUTH_admin
+  Container: c
+  Object: dummy
+  Object hash: 128fdf98bddd1b1e8695f4340e67a67a
+Content-Type: application/octet-stream
+Timestamp: 1970-01-01T00:01:46.300000 (%s)
+System Metadata:
+  X-Object-Sysmeta-Mtime: 107.3
+User Metadata:
+  X-Object-Meta-Mtime: 107.3
+Other Metadata:
+  X-Object-Mtime: 107.3''' % (
+            utils.Timestamp(106.3).internal)
+
+        self.assertEquals(out.getvalue().strip(), exp_out)
+
+        metadata = get_metadata({})
+        out = StringIO()
+        with mock.patch('sys.stdout', out):
+            print_obj_metadata(metadata)
+        exp_out = '''Path: /AUTH_admin/c/dummy
+  Account: AUTH_admin
+  Container: c
+  Object: dummy
+  Object hash: 128fdf98bddd1b1e8695f4340e67a67a
+Content-Type: application/octet-stream
+Timestamp: 1970-01-01T00:01:46.300000 (%s)
+System Metadata:
+  No metadata found
+User Metadata:
+  No metadata found
+Other Metadata:
+  No metadata found''' % (
+            utils.Timestamp(106.3).internal)
+
+        self.assertEquals(out.getvalue().strip(), exp_out)
+
+        metadata = get_metadata({'X-Object-Meta-Mtime': '107.3'})
        metadata['name'] = '/a-s'
        self.assertRaisesMessage(ValueError, 'Path is invalid',
                                 print_obj_metadata, metadata)

-        metadata = reset_metadata()
+        metadata = get_metadata({'X-Object-Meta-Mtime': '107.3'})
        del metadata['name']
        out = StringIO()
        with mock.patch('sys.stdout', out):
@ -471,12 +558,17 @@ User Metadata: {'X-Object-Meta-Mtime': '107.3'}''' % (
        exp_out = '''Path: Not found in metadata
 Content-Type: application/octet-stream
 Timestamp: 1970-01-01T00:01:46.300000 (%s)
-User Metadata: {'X-Object-Meta-Mtime': '107.3'}''' % (
+System Metadata:
+  No metadata found
+User Metadata:
+  X-Object-Meta-Mtime: 107.3
+Other Metadata:
+  No metadata found''' % (
            utils.Timestamp(106.3).internal)

        self.assertEquals(out.getvalue().strip(), exp_out)

-        metadata = reset_metadata()
+        metadata = get_metadata({'X-Object-Meta-Mtime': '107.3'})
        del metadata['Content-Type']
        out = StringIO()
        with mock.patch('sys.stdout', out):
@ -488,12 +580,17 @@ User Metadata: {'X-Object-Meta-Mtime': '107.3'}''' % (
  Object hash: 128fdf98bddd1b1e8695f4340e67a67a
 Content-Type: Not found in metadata
 Timestamp: 1970-01-01T00:01:46.300000 (%s)
-User Metadata: {'X-Object-Meta-Mtime': '107.3'}''' % (
+System Metadata:
+  No metadata found
+User Metadata:
+  X-Object-Meta-Mtime: 107.3
+Other Metadata:
+  No metadata found''' % (
            utils.Timestamp(106.3).internal)

        self.assertEquals(out.getvalue().strip(), exp_out)

-        metadata = reset_metadata()
+        metadata = get_metadata({'X-Object-Meta-Mtime': '107.3'})
        del metadata['X-Timestamp']
        out = StringIO()
        with mock.patch('sys.stdout', out):
@ -505,6 +602,11 @@ User Metadata: {'X-Object-Meta-Mtime': '107.3'}''' % (
  Object hash: 128fdf98bddd1b1e8695f4340e67a67a
 Content-Type: application/octet-stream
 Timestamp: Not found in metadata
-User Metadata: {'X-Object-Meta-Mtime': '107.3'}'''
+System Metadata:
+  No metadata found
+User Metadata:
+  X-Object-Meta-Mtime: 107.3
+Other Metadata:
+  No metadata found'''

        self.assertEquals(out.getvalue().strip(), exp_out)
--- a/test/unit/cli/test_recon.py
+++ b/test/unit/cli/test_recon.py
@ -293,6 +293,43 @@ class TestRecon(unittest.TestCase):
                                  % ex)
        self.assertFalse(expected)

+    def test_drive_audit_check(self):
+        hosts = [('127.0.0.1', 6010), ('127.0.0.1', 6020),
+                 ('127.0.0.1', 6030), ('127.0.0.1', 6040)]
+        # sample json response from http://<host>:<port>/recon/driveaudit
+        responses = {6010: {'drive_audit_errors': 15},
+                     6020: {'drive_audit_errors': 0},
+                     6030: {'drive_audit_errors': 257},
+                     6040: {'drive_audit_errors': 56}}
+        # <low> <high> <avg> <total> <Failed> <no_result> <reported>
+        expected = (0, 257, 82.0, 328, 0.0, 0, 4)
+
+        def mock_scout_driveaudit(app, host):
+            url = 'http://%s:%s/recon/driveaudit' % host
+            response = responses[host[1]]
+            status = 200
+            return url, response, status
+
+        stdout = StringIO()
+        patches = [
+            mock.patch('swift.cli.recon.Scout.scout', mock_scout_driveaudit),
+            mock.patch('sys.stdout', new=stdout),
+        ]
+        with nested(*patches):
+            self.recon_instance.driveaudit_check(hosts)
+
+        output = stdout.getvalue()
+        r = re.compile("\[drive_audit_errors(.*)\](.*)")
+        lines = output.splitlines()
+        self.assertTrue(lines)
+        for line in lines:
+            m = r.match(line)
+            if m:
+                self.assertEquals(m.group(2),
+                                  " low: %s, high: %s, avg: %s, total: %s,"
+                                  " Failed: %s%%, no_result: %s, reported: %s"
+                                  % expected)
+

 class TestReconCommands(unittest.TestCase):
    def setUp(self):
@ -485,3 +522,173 @@ class TestReconCommands(unittest.TestCase):
            self.assertTrue(computed)
            for key in keys:
                self.assertTrue(key in computed)
+
+    def test_disk_usage(self):
+        def dummy_request(*args, **kwargs):
+            return [('http://127.0.0.1:6010/recon/diskusage', [
+                {"device": "sdb1", "mounted": True,
+                 "avail": 10, "used": 90, "size": 100},
+                {"device": "sdc1", "mounted": True,
+                 "avail": 15, "used": 85, "size": 100},
+                {"device": "sdd1", "mounted": True,
+                 "avail": 15, "used": 85, "size": 100}],
+                200)]
+
+        cli = recon.SwiftRecon()
+        cli.pool.imap = dummy_request
+
+        default_calls = [
+            mock.call('Distribution Graph:'),
+            mock.call(' 85%    2 **********************************' +
+                      '***********************************'),
+            mock.call(' 90%    1 **********************************'),
+            mock.call('Disk usage: space used: 260 of 300'),
+            mock.call('Disk usage: space free: 40 of 300'),
+            mock.call('Disk usage: lowest: 85.0%, ' +
+                      'highest: 90.0%, avg: 86.6666666667%'),
+            mock.call('=' * 79),
+        ]
+
+        with mock.patch('__builtin__.print') as mock_print:
+            cli.disk_usage([('127.0.0.1', 6010)])
+            mock_print.assert_has_calls(default_calls)
+
+        with mock.patch('__builtin__.print') as mock_print:
+            expected_calls = default_calls + [
+                mock.call('LOWEST 5'),
+                mock.call('85.00%  127.0.0.1       sdc1'),
+                mock.call('85.00%  127.0.0.1       sdd1'),
+                mock.call('90.00%  127.0.0.1       sdb1')
+            ]
+            cli.disk_usage([('127.0.0.1', 6010)], 0, 5)
+            mock_print.assert_has_calls(expected_calls)
+
+        with mock.patch('__builtin__.print') as mock_print:
+            expected_calls = default_calls + [
+                mock.call('TOP 5'),
+                mock.call('90.00%  127.0.0.1       sdb1'),
+                mock.call('85.00%  127.0.0.1       sdc1'),
+                mock.call('85.00%  127.0.0.1       sdd1')
+            ]
+            cli.disk_usage([('127.0.0.1', 6010)], 5, 0)
+            mock_print.assert_has_calls(expected_calls)
+
+    @mock.patch('__builtin__.print')
+    @mock.patch('time.time')
+    def test_object_replication_check(self, mock_now, mock_print):
+        now = 1430000000.0
+
+        def dummy_request(*args, **kwargs):
+            return [
+                ('http://127.0.0.1:6010/recon/replication/object',
+                 {"object_replication_time": 61,
+                  "object_replication_last": now},
+                 200),
+                ('http://127.0.0.1:6020/recon/replication/object',
+                 {"object_replication_time": 23,
+                  "object_replication_last": now},
+                 200),
+            ]
+
+        cli = recon.SwiftRecon()
+        cli.pool.imap = dummy_request
+
+        default_calls = [
+            mock.call('[replication_time] low: 23, high: 61, avg: 42.0, ' +
+                      'total: 84, Failed: 0.0%, no_result: 0, reported: 2'),
+            mock.call('Oldest completion was 2015-04-25 22:13:20 ' +
+                      '(42 seconds ago) by 127.0.0.1:6010.'),
+            mock.call('Most recent completion was 2015-04-25 22:13:20 ' +
+                      '(42 seconds ago) by 127.0.0.1:6010.'),
+        ]
+
+        mock_now.return_value = now + 42
+        cli.object_replication_check([('127.0.0.1', 6010),
+                                      ('127.0.0.1', 6020)])
+        mock_print.assert_has_calls(default_calls)
+
+    @mock.patch('__builtin__.print')
+    @mock.patch('time.time')
+    def test_replication_check(self, mock_now, mock_print):
+        now = 1430000000.0
+
+        def dummy_request(*args, **kwargs):
+            return [
+                ('http://127.0.0.1:6011/recon/replication/container',
+                 {"replication_last": now,
+                  "replication_stats": {
+                      "no_change": 2, "rsync": 0, "success": 3, "failure": 1,
+                      "attempted": 0, "ts_repl": 0, "remove": 0,
+                      "remote_merge": 0, "diff_capped": 0, "start": now,
+                      "hashmatch": 0, "diff": 0, "empty": 0},
+                  "replication_time": 42},
+                 200),
+                ('http://127.0.0.1:6021/recon/replication/container',
+                 {"replication_last": now,
+                  "replication_stats": {
+                      "no_change": 0, "rsync": 0, "success": 1, "failure": 0,
+                      "attempted": 0, "ts_repl": 0, "remove": 0,
+                      "remote_merge": 0, "diff_capped": 0, "start": now,
+                      "hashmatch": 0, "diff": 0, "empty": 0},
+                  "replication_time": 23},
+                 200),
+            ]
+
+        cli = recon.SwiftRecon()
+        cli.pool.imap = dummy_request
+
+        default_calls = [
+            mock.call('[replication_failure] low: 0, high: 1, avg: 0.5, ' +
+                      'total: 1, Failed: 0.0%, no_result: 0, reported: 2'),
+            mock.call('[replication_success] low: 1, high: 3, avg: 2.0, ' +
+                      'total: 4, Failed: 0.0%, no_result: 0, reported: 2'),
+            mock.call('[replication_time] low: 23, high: 42, avg: 32.5, ' +
+                      'total: 65, Failed: 0.0%, no_result: 0, reported: 2'),
+            mock.call('[replication_attempted] low: 0, high: 0, avg: 0.0, ' +
+                      'total: 0, Failed: 0.0%, no_result: 0, reported: 2'),
+            mock.call('Oldest completion was 2015-04-25 22:13:20 ' +
+                      '(42 seconds ago) by 127.0.0.1:6011.'),
+            mock.call('Most recent completion was 2015-04-25 22:13:20 ' +
+                      '(42 seconds ago) by 127.0.0.1:6011.'),
+        ]
+
+        mock_now.return_value = now + 42
+        cli.replication_check([('127.0.0.1', 6011), ('127.0.0.1', 6021)])
+        # We need any_order=True because the order of calls depends on the dict
+        # that is returned from the recon middleware, thus can't rely on it
+        mock_print.assert_has_calls(default_calls, any_order=True)
+
+    @mock.patch('__builtin__.print')
+    @mock.patch('time.time')
+    def test_load_check(self, mock_now, mock_print):
+        now = 1430000000.0
+
+        def dummy_request(*args, **kwargs):
+            return [
+                ('http://127.0.0.1:6010/recon/load',
+                 {"1m": 0.2, "5m": 0.4, "15m": 0.25,
+                  "processes": 10000, "tasks": "1/128"},
+                 200),
+                ('http://127.0.0.1:6020/recon/load',
+                 {"1m": 0.4, "5m": 0.8, "15m": 0.75,
+                  "processes": 9000, "tasks": "1/200"},
+                 200),
+            ]
+
+        cli = recon.SwiftRecon()
+        cli.pool.imap = dummy_request
+
+        default_calls = [
+            mock.call('[5m_load_avg] low: 0, high: 0, avg: 0.6, total: 1, ' +
+                      'Failed: 0.0%, no_result: 0, reported: 2'),
+            mock.call('[15m_load_avg] low: 0, high: 0, avg: 0.5, total: 1, ' +
+                      'Failed: 0.0%, no_result: 0, reported: 2'),
+            mock.call('[1m_load_avg] low: 0, high: 0, avg: 0.3, total: 0, ' +
+                      'Failed: 0.0%, no_result: 0, reported: 2'),
+        ]
+
+        mock_now.return_value = now + 42
+        cli.load_check([('127.0.0.1', 6010), ('127.0.0.1', 6020)])
+        # We need any_order=True because the order of calls depends on the dict
+        # that is returned from the recon middleware, thus can't rely on it
+        mock_print.assert_has_calls(default_calls, any_order=True)
--- a/test/unit/cli/test_ringbuilder.py
+++ b/test/unit/cli/test_ringbuilder.py
@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import logging
 import mock
 import os
 import StringIO
@ -1710,10 +1711,43 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin):
        ring.devs[0]['weight'] = 10
        ring.save(self.tmpfile)
        argv = ["", self.tmpfile, "rebalance"]
+        err = None
        try:
            ringbuilder.main(argv)
        except SystemExit as e:
-            self.assertEquals(e.code, 1)
+            err = e
+        self.assertEquals(err.code, 1)
+
+    def test_invalid_device_name(self):
+        self.create_sample_ring()
+        for device_name in ["", " ", " sda1", "sda1 ", " meta "]:
+            err = 0
+
+            argv = ["",
+                    self.tmpfile,
+                    "add",
+                    "r1z1-127.0.0.1:6000/%s" % device_name,
+                    "1"]
+            try:
+                ringbuilder.main(argv)
+            except SystemExit as exc:
+                err = exc
+            self.assertEquals(err.code, 2)
+
+            argv = ["",
+                    self.tmpfile,
+                    "add",
+                    "--region", "1",
+                    "--zone", "1",
+                    "--ip", "127.0.0.1",
+                    "--port", "6000",
+                    "--device", device_name,
+                    "--weight", "100"]
+            try:
+                ringbuilder.main(argv)
+            except SystemExit as exc:
+                err = exc
+            self.assertEquals(err.code, 2)


 class TestRebalanceCommand(unittest.TestCase, RunSwiftRingBuilderMixin):
@ -1744,6 +1778,32 @@ class TestRebalanceCommand(unittest.TestCase, RunSwiftRingBuilderMixin):
                raise
        return (mock_stdout.getvalue(), mock_stderr.getvalue())

+    def test_debug(self):
+        # NB: getLogger(name) always returns the same object
+        rb_logger = logging.getLogger("swift.ring.builder")
+        try:
+            self.assertNotEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+            self.run_srb("create", 8, 3, 1)
+            self.run_srb("add",
+                         "r1z1-10.1.1.1:2345/sda", 100.0,
+                         "r1z1-10.1.1.1:2345/sdb", 100.0,
+                         "r1z1-10.1.1.1:2345/sdc", 100.0,
+                         "r1z1-10.1.1.1:2345/sdd", 100.0)
+            self.run_srb("rebalance", "--debug")
+            self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+            rb_logger.setLevel(logging.INFO)
+            self.run_srb("rebalance", "--debug", "123")
+            self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+            rb_logger.setLevel(logging.INFO)
+            self.run_srb("rebalance", "123", "--debug")
+            self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+        finally:
+            rb_logger.setLevel(logging.INFO)  # silence other test cases
+
    def test_rebalance_warning_appears(self):
        self.run_srb("create", 8, 3, 24)
        # all in one machine: totally balanceable
--- a/test/unit/common/middleware/test_dlo.py
+++ b/test/unit/common/middleware/test_dlo.py
@ -564,9 +564,10 @@ class TestDloGetManifest(DloTestCase):
                                 environ={'REQUEST_METHOD': 'GET'})
        status, headers, body = self.call_dlo(req)
        self.assertEqual(status, "409 Conflict")
-        err_log = self.dlo.logger.log_dict['exception'][0][0][0]
-        self.assertTrue(err_log.startswith('ERROR: An error occurred '
-                                           'while retrieving segments'))
+        err_lines = self.dlo.logger.get_lines_for_level('error')
+        self.assertEqual(len(err_lines), 1)
+        self.assertTrue(err_lines[0].startswith(
+            'ERROR: An error occurred while retrieving segments'))

    def test_error_fetching_second_segment(self):
        self.app.register(
@ -581,9 +582,10 @@ class TestDloGetManifest(DloTestCase):
        self.assertTrue(isinstance(exc, exceptions.SegmentError))
        self.assertEqual(status, "200 OK")
        self.assertEqual(''.join(body), "aaaaa")  # first segment made it out
-        err_log = self.dlo.logger.log_dict['exception'][0][0][0]
-        self.assertTrue(err_log.startswith('ERROR: An error occurred '
-                                           'while retrieving segments'))
+        err_lines = self.dlo.logger.get_lines_for_level('error')
+        self.assertEqual(len(err_lines), 1)
+        self.assertTrue(err_lines[0].startswith(
+            'ERROR: An error occurred while retrieving segments'))

    def test_error_listing_container_first_listing_request(self):
        self.app.register(
--- a/test/unit/common/middleware/test_keystoneauth.py
+++ b/test/unit/common/middleware/test_keystoneauth.py
@ -158,6 +158,31 @@ class SwiftAuth(unittest.TestCase):
        resp = req.get_response(self.test_auth)
        self.assertEqual(resp.status_int, 401)

+    def test_denied_responses(self):
+
+        def get_resp_status(headers):
+            req = self._make_request(headers=headers)
+            resp = req.get_response(self.test_auth)
+            return resp.status_int
+
+        self.assertEqual(get_resp_status({'X_IDENTITY_STATUS': 'Confirmed'}),
+                         403)
+        self.assertEqual(get_resp_status(
+                         {'X_IDENTITY_STATUS': 'Confirmed',
+                          'X_SERVICE_IDENTITY_STATUS': 'Confirmed'}), 403)
+        self.assertEqual(get_resp_status({}), 401)
+        self.assertEqual(get_resp_status(
+                         {'X_IDENTITY_STATUS': 'Invalid'}), 401)
+        self.assertEqual(get_resp_status(
+                         {'X_IDENTITY_STATUS': 'Invalid',
+                          'X_SERVICE_IDENTITY_STATUS': 'Confirmed'}), 401)
+        self.assertEqual(get_resp_status(
+                         {'X_IDENTITY_STATUS': 'Confirmed',
+                          'X_SERVICE_IDENTITY_STATUS': 'Invalid'}), 401)
+        self.assertEqual(get_resp_status(
+                         {'X_IDENTITY_STATUS': 'Invalid',
+                          'X_SERVICE_IDENTITY_STATUS': 'Invalid'}), 401)
+
    def test_blank_reseller_prefix(self):
        conf = {'reseller_prefix': ''}
        test_auth = keystoneauth.filter_factory(conf)(FakeApp())
@ -854,6 +879,25 @@ class TestAuthorize(BaseTestAuthorize):
        acl = '%s:%s' % (id['HTTP_X_TENANT_ID'], id['HTTP_X_USER_ID'])
        self._check_authenticate(acl=acl, identity=id, env=env)

+    def test_keystone_identity(self):
+        user_name = 'U_NAME'
+        project = ('P_ID', 'P_NAME')
+        roles = ('ROLE1', 'ROLE2')
+
+        req = Request.blank('/v/a/c/o')
+        req.headers.update({'X-Identity-Status': 'Confirmed',
+                            'X-Roles': ' %s , %s ' % roles,
+                            'X-User-Name': user_name,
+                            'X-Tenant-Id': project[0],
+                            'X-Tenant-Name': project[1]})
+
+        expected = {'user': user_name,
+                    'tenant': project,
+                    'roles': list(roles)}
+        data = self.test_auth._keystone_identity(req.environ)
+
+        self.assertEquals(expected, data)
+
    def test_integral_keystone_identity(self):
        user = ('U_ID', 'U_NAME')
        roles = ('ROLE1', 'ROLE2')
--- a/test/unit/common/middleware/test_recon.py
+++ b/test/unit/common/middleware/test_recon.py
@ -172,6 +172,9 @@ class FakeRecon(object):
    def fake_sockstat(self):
        return {'sockstattest': "1"}

+    def fake_driveaudit(self):
+        return {'driveaudittest': "1"}
+
    def nocontent(self):
        return None

@ -489,6 +492,9 @@ class TestReconSuccess(TestCase):
        from_cache_response = {'async_pending': 5}
        self.fakecache.fakeout = from_cache_response
        rv = self.app.get_async_info()
+        self.assertEquals(self.fakecache.fakeout_calls,
+                          [((['async_pending'],
+                              '/var/cache/swift/object.recon'), {})])
        self.assertEquals(rv, {'async_pending': 5})

    def test_get_replication_info_account(self):
@ -585,6 +591,17 @@ class TestReconSuccess(TestCase):
                             '/var/cache/swift/object.recon'), {})])
        self.assertEquals(rv, {"object_updater_sweep": 0.79848217964172363})

+    def test_get_expirer_info_object(self):
+        from_cache_response = {'object_expiration_pass': 0.79848217964172363,
+                               'expired_last_pass': 99}
+        self.fakecache.fakeout_calls = []
+        self.fakecache.fakeout = from_cache_response
+        rv = self.app.get_expirer_info('object')
+        self.assertEquals(self.fakecache.fakeout_calls,
+                          [((['object_expiration_pass', 'expired_last_pass'],
+                             '/var/cache/swift/object.recon'), {})])
+        self.assertEquals(rv, from_cache_response)
+
    def test_get_auditor_info_account(self):
        from_cache_response = {"account_auditor_pass_completed": 0.24,
                               "account_audits_failed": 0,
@ -829,6 +846,15 @@ class TestReconSuccess(TestCase):
            (('/proc/net/sockstat', 'r'), {}),
            (('/proc/net/sockstat6', 'r'), {})])

+    def test_get_driveaudit_info(self):
+        from_cache_response = {'drive_audit_errors': 7}
+        self.fakecache.fakeout = from_cache_response
+        rv = self.app.get_driveaudit_error()
+        self.assertEquals(self.fakecache.fakeout_calls,
+                          [((['drive_audit_errors'],
+                             '/var/cache/swift/drive.recon'), {})])
+        self.assertEquals(rv, {'drive_audit_errors': 7})
+

 class TestReconMiddleware(unittest.TestCase):

@ -857,6 +883,7 @@ class TestReconMiddleware(unittest.TestCase):
        self.app.get_swift_conf_md5 = self.frecon.fake_swiftconfmd5
        self.app.get_quarantine_count = self.frecon.fake_quarantined
        self.app.get_socket_info = self.frecon.fake_sockstat
+        self.app.get_driveaudit_error = self.frecon.fake_driveaudit

    def test_recon_get_mem(self):
        get_mem_resp = ['{"memtest": "1"}']
@ -1084,5 +1111,12 @@ class TestReconMiddleware(unittest.TestCase):
        resp = self.app(req.environ, start_response)
        self.assertEquals(resp, 'FAKE APP')

+    def test_recon_get_driveaudit(self):
+        get_driveaudit_resp = ['{"driveaudittest": "1"}']
+        req = Request.blank('/recon/driveaudit',
+                            environ={'REQUEST_METHOD': 'GET'})
+        resp = self.app(req.environ, start_response)
+        self.assertEquals(resp, get_driveaudit_resp)
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/unit/common/middleware/test_slo.py
+++ b/test/unit/common/middleware/test_slo.py
@ -1431,9 +1431,10 @@ class TestSloGetManifest(SloTestCase):

        self.assertEqual(status, '409 Conflict')
        self.assertEqual(self.app.call_count, 10)
-        err_log = self.slo.logger.log_dict['exception'][0][0][0]
-        self.assertTrue(err_log.startswith('ERROR: An error occurred '
-                                           'while retrieving segments'))
+        error_lines = self.slo.logger.get_lines_for_level('error')
+        self.assertEqual(len(error_lines), 1)
+        self.assertTrue(error_lines[0].startswith(
+            'ERROR: An error occurred while retrieving segments'))

    def test_get_with_if_modified_since(self):
        # It's important not to pass the If-[Un]Modified-Since header to the
@ -1508,9 +1509,10 @@ class TestSloGetManifest(SloTestCase):
        status, headers, body = self.call_slo(req)

        self.assertEqual('409 Conflict', status)
-        err_log = self.slo.logger.log_dict['exception'][0][0][0]
-        self.assertTrue(err_log.startswith('ERROR: An error occurred '
-                                           'while retrieving segments'))
+        error_lines = self.slo.logger.get_lines_for_level('error')
+        self.assertEqual(len(error_lines), 1)
+        self.assertTrue(error_lines[0].startswith(
+            'ERROR: An error occurred while retrieving segments'))

    def test_invalid_json_submanifest(self):
        self.app.register(
@ -1585,9 +1587,10 @@ class TestSloGetManifest(SloTestCase):
        status, headers, body = self.call_slo(req)

        self.assertEqual('409 Conflict', status)
-        err_log = self.slo.logger.log_dict['exception'][0][0][0]
-        self.assertTrue(err_log.startswith('ERROR: An error occurred '
-                                           'while retrieving segments'))
+        error_lines = self.slo.logger.get_lines_for_level('error')
+        self.assertEqual(len(error_lines), 1)
+        self.assertTrue(error_lines[0].startswith(
+            'ERROR: An error occurred while retrieving segments'))

    def test_first_segment_mismatched_size(self):
        self.app.register('GET', '/v1/AUTH_test/gettest/manifest-badsize',
@ -1603,9 +1606,10 @@ class TestSloGetManifest(SloTestCase):
        status, headers, body = self.call_slo(req)

        self.assertEqual('409 Conflict', status)
-        err_log = self.slo.logger.log_dict['exception'][0][0][0]
-        self.assertTrue(err_log.startswith('ERROR: An error occurred '
-                                           'while retrieving segments'))
+        error_lines = self.slo.logger.get_lines_for_level('error')
+        self.assertEqual(len(error_lines), 1)
+        self.assertTrue(error_lines[0].startswith(
+            'ERROR: An error occurred while retrieving segments'))

    def test_download_takes_too_long(self):
        the_time = [time.time()]
@ -1657,9 +1661,10 @@ class TestSloGetManifest(SloTestCase):
        status, headers, body = self.call_slo(req)

        self.assertEqual('409 Conflict', status)
-        err_log = self.slo.logger.log_dict['exception'][0][0][0]
-        self.assertTrue(err_log.startswith('ERROR: An error occurred '
-                                           'while retrieving segments'))
+        error_lines = self.slo.logger.get_lines_for_level('error')
+        self.assertEqual(len(error_lines), 1)
+        self.assertTrue(error_lines[0].startswith(
+            'ERROR: An error occurred while retrieving segments'))


 class TestSloBulkLogger(unittest.TestCase):
--- a/test/unit/common/middleware/test_tempauth.py
+++ b/test/unit/common/middleware/test_tempauth.py
@ -14,9 +14,10 @@
 # limitations under the License.

 import unittest
-from contextlib import contextmanager
+from contextlib import contextmanager, nested
 from base64 import b64encode
 from time import time
+import mock

 from swift.common.middleware import tempauth as auth
 from swift.common.middleware.acl import format_acl
@ -266,6 +267,25 @@ class TestAuth(unittest.TestCase):
        self.assertEquals(req.environ['swift.authorize'],
                          local_auth.denied_response)

+    def test_auth_with_s3_authorization(self):
+        local_app = FakeApp()
+        local_auth = auth.filter_factory(
+            {'user_s3_s3': 's3 .admin'})(local_app)
+        req = self._make_request('/v1/AUTH_s3',
+                                 headers={'X-Auth-Token': 't',
+                                          'AUTHORIZATION': 'AWS s3:s3:pass'})
+
+        with nested(mock.patch('base64.urlsafe_b64decode'),
+                    mock.patch('base64.encodestring')) as (msg, sign):
+            msg.return_value = ''
+            sign.return_value = 'pass'
+            resp = req.get_response(local_auth)
+
+        self.assertEquals(resp.status_int, 404)
+        self.assertEquals(local_app.calls, 1)
+        self.assertEquals(req.environ['swift.authorize'],
+                          local_auth.authorize)
+
    def test_auth_no_reseller_prefix_no_token(self):
        # Check that normally we set up a call back to our authorize.
        local_auth = auth.filter_factory({'reseller_prefix': ''})(FakeApp())
--- a/test/unit/common/ring/test_ring.py
+++ b/test/unit/common/ring/test_ring.py
@ -363,63 +363,74 @@ class TestRing(TestRingBase):
        self.assertRaises(TypeError, self.ring.get_nodes)
        part, nodes = self.ring.get_nodes('a')
        self.assertEquals(part, 0)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

        part, nodes = self.ring.get_nodes('a1')
        self.assertEquals(part, 0)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

        part, nodes = self.ring.get_nodes('a4')
        self.assertEquals(part, 1)
-        self.assertEquals(nodes, [self.intended_devs[1],
-                                  self.intended_devs[4]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[1],
+                          self.intended_devs[4]])])

        part, nodes = self.ring.get_nodes('aa')
        self.assertEquals(part, 1)
-        self.assertEquals(nodes, [self.intended_devs[1],
-                                  self.intended_devs[4]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[1],
+                          self.intended_devs[4]])])

        part, nodes = self.ring.get_nodes('a', 'c1')
        self.assertEquals(part, 0)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

        part, nodes = self.ring.get_nodes('a', 'c0')
        self.assertEquals(part, 3)
-        self.assertEquals(nodes, [self.intended_devs[1],
-                                  self.intended_devs[4]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[1],
+                          self.intended_devs[4]])])

        part, nodes = self.ring.get_nodes('a', 'c3')
        self.assertEquals(part, 2)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

        part, nodes = self.ring.get_nodes('a', 'c2')
-        self.assertEquals(part, 2)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

        part, nodes = self.ring.get_nodes('a', 'c', 'o1')
        self.assertEquals(part, 1)
-        self.assertEquals(nodes, [self.intended_devs[1],
-                                  self.intended_devs[4]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[1],
+                          self.intended_devs[4]])])

        part, nodes = self.ring.get_nodes('a', 'c', 'o5')
        self.assertEquals(part, 0)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

        part, nodes = self.ring.get_nodes('a', 'c', 'o0')
        self.assertEquals(part, 0)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

        part, nodes = self.ring.get_nodes('a', 'c', 'o2')
        self.assertEquals(part, 2)
-        self.assertEquals(nodes, [self.intended_devs[0],
-                                  self.intended_devs[3]])
+        self.assertEquals(nodes, [dict(node, index=i) for i, node in
+                          enumerate([self.intended_devs[0],
+                          self.intended_devs[3]])])

    def add_dev_to_ring(self, new_dev):
        self.ring.devs.append(new_dev)
--- a/test/unit/common/test_constraints.py
+++ b/test/unit/common/test_constraints.py
@ -368,6 +368,11 @@ class TestConstraints(unittest.TestCase):
        self.assertTrue('X-Delete-At' in req.headers)
        self.assertEqual(req.headers['X-Delete-At'], expected)

+    def test_check_dir(self):
+        self.assertFalse(constraints.check_dir('', ''))
+        with mock.patch("os.path.isdir", MockTrue()):
+            self.assertTrue(constraints.check_dir('/srv', 'foo/bar'))
+
    def test_check_mount(self):
        self.assertFalse(constraints.check_mount('', ''))
        with mock.patch("swift.common.utils.ismount", MockTrue()):
--- a/Show More
+++ b/Show More