sahara/sahara/plugins/vanilla/v1_2_1/resources/hdfs-default.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Do not modify this file directly.  Instead, copy entries that you -->
<!-- wish to modify from this file into hdfs-site.xml and change them -->
<!-- there.  If hdfs-site.xml does not already exist, create it.      -->

<configuration>

<property>
  <name>dfs.namenode.logging.level</name>
  <value>info</value>
  <description>The logging level for dfs namenode. Other values are "dir"(trac
e namespace mutations), "block"(trace block under/over replications and block
creations/deletions), or "all".</description>
</property>

<property>
  <name>dfs.namenode.rpc-address</name>
  <value></value>
  <description>
    RPC address that handles all clients requests. If empty then we'll get the
    value from fs.default.name.
    The value of this property will take the form of hdfs://nn-host1:rpc-port.
  </description>
</property>

<property>
  <name>dfs.secondary.http.address</name>
  <value>0.0.0.0:50090</value>
  <description>
    The secondary namenode http server address and port.
    If the port is 0 then the server will start on a free port.
  </description>
</property>

<property>
  <name>dfs.datanode.address</name>
  <value>0.0.0.0:50010</value>
  <description>
    The datanode server address and port for data transfer.
    If the port is 0 then the server will start on a free port.
  </description>
</property>

<property>
  <name>dfs.datanode.http.address</name>
  <value>0.0.0.0:50075</value>
  <description>
    The datanode http server address and port.
    If the port is 0 then the server will start on a free port.
  </description>
</property>

<property>
  <name>dfs.datanode.ipc.address</name>
  <value>0.0.0.0:50020</value>
  <description>
    The datanode ipc server address and port.
    If the port is 0 then the server will start on a free port.
  </description>
</property>

<property>
  <name>dfs.datanode.handler.count</name>
  <value>3</value>
  <description>The number of server threads for the datanode.</description>
</property>

<property>
  <name>dfs.http.address</name>
  <value>0.0.0.0:50070</value>
  <description>
    The address and the base port where the dfs namenode web ui will listen on.
    If the port is 0 then the server will start on a free port.
  </description>
</property>

<property>
  <name>dfs.https.enable</name>
  <value>false</value>
  <description>Decide if HTTPS(SSL) is supported on HDFS
  </description>
</property>

<property>
  <name>dfs.https.need.client.auth</name>
  <value>false</value>
  <description>Whether SSL client certificate authentication is required
  </description>
</property>

<property>
  <name>dfs.https.server.keystore.resource</name>
  <value>ssl-server.xml</value>
  <description>Resource file from which ssl server keystore
  information will be extracted
  </description>
</property>

<property>
  <name>dfs.https.client.keystore.resource</name>
  <value>ssl-client.xml</value>
  <description>Resource file from which ssl client keystore
  information will be extracted
  </description>
</property>

<property>
  <name>dfs.datanode.https.address</name>
  <value>0.0.0.0:50475</value>
</property>

<property>
  <name>dfs.https.address</name>
  <value>0.0.0.0:50470</value>
</property>

 <property>
  <name>dfs.datanode.dns.interface</name>
  <value>default</value>
  <description>The name of the Network Interface from which a data node should
  report its IP address.
  </description>
 </property>

<property>
  <name>dfs.datanode.dns.nameserver</name>
  <value>default</value>
  <description>The host name or IP address of the name server (DNS)
  which a DataNode should use to determine the host name used by the
  NameNode for communication and display purposes.
  </description>
 </property>


<property>
  <name>dfs.replication.considerLoad</name>
  <value>true</value>
  <description>Decide if chooseTarget considers the target's load or not
  </description>
</property>
<property>
  <name>dfs.default.chunk.view.size</name>
  <value>32768</value>
  <description>The number of bytes to view for a file on the browser.
  </description>
</property>

<property>
  <name>dfs.datanode.du.reserved</name>
  <value>0</value>
  <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
  </description>
</property>

<property>
  <name>dfs.name.dir</name>
  <value>${hadoop.tmp.dir}/dfs/name</value>
  <description>Determines where on the local filesystem the DFS name node
      should store the name table(fsimage).  If this is a comma-delimited list
      of directories then the name table is replicated in all of the
      directories, for redundancy. </description>
</property>

<property>
  <name>dfs.name.edits.dir</name>
  <value>${dfs.name.dir}</value>
  <description>Determines where on the local filesystem the DFS name node
      should store the transaction (edits) file. If this is a comma-delimited list
      of directories then the transaction file is replicated in all of the
      directories, for redundancy. Default value is same as dfs.name.dir
  </description>
</property>

<property>
  <name>dfs.namenode.edits.toleration.length</name>
  <value>0</value>
  <description>
    The length in bytes that namenode is willing to tolerate when the edit log
    is corrupted.  The edit log toleration feature checks the entire edit log.
    It computes read length (the length of valid data), corruption length and
    padding length.  In case that corruption length is non-zero, the corruption
    will be tolerated only if the corruption length is less than or equal to
    the toleration length.

    For disabling edit log toleration feature, set this property to -1.  When
    the feature is disabled, the end of edit log will not be checked.  In this
    case, namenode will startup normally even if the end of edit log is
    corrupted.
  </description>
</property>

<property>
  <name>dfs.web.ugi</name>
  <value>webuser,webgroup</value>
  <description>The user account used by the web interface.
    Syntax: USERNAME,GROUP1,GROUP2, ...
  </description>
</property>

<property>
  <name>dfs.permissions</name>
  <value>true</value>
  <description>
    If "true", enable permission checking in HDFS.
    If "false", permission checking is turned off,
    but all other behavior is unchanged.
    Switching from one parameter value to the other does not change the mode,
    owner or group of files or directories.
  </description>
</property>

<property>
  <name>dfs.permissions.supergroup</name>
  <value>supergroup</value>
  <description>The name of the group of super-users.</description>
</property>

<property>
  <name>dfs.block.access.token.enable</name>
  <value>false</value>
  <description>
    If "true", access tokens are used as capabilities for accessing datanodes.
    If "false", no access tokens are checked on accessing datanodes.
  </description>
</property>

<property>
  <name>dfs.block.access.key.update.interval</name>
  <value>600</value>
  <description>
    Interval in minutes at which namenode updates its access keys.
  </description>
</property>

<property>
  <name>dfs.block.access.token.lifetime</name>
  <value>600</value>
  <description>The lifetime of access tokens in minutes.</description>
</property>


<property>
  <name>dfs.data.dir</name>
  <value>${hadoop.tmp.dir}/dfs/data</value>
  <description>Determines where on the local filesystem an DFS data node
  should store its blocks.  If this is a comma-delimited
  list of directories, then data will be stored in all named
  directories, typically on different devices.
  Directories that do not exist are ignored.
  </description>
</property>

<property>
  <name>dfs.datanode.data.dir.perm</name>
  <value>755</value>
  <description>Permissions for the directories on on the local filesystem where
  the DFS data node store its blocks. The permissions can either be octal or
  symbolic.</description>
</property>

<property>
  <name>dfs.replication</name>
  <value>3</value>
  <description>Default block replication.
  The actual number of replications can be specified when the file is created.
  The default is used if replication is not specified in create time.
  </description>
</property>

<property>
  <name>dfs.replication.max</name>
  <value>512</value>
  <description>Maximal block replication.
  </description>
</property>

<property>
  <name>dfs.replication.min</name>
  <value>1</value>
  <description>Minimal block replication.
  </description>
</property>

<property>
  <name>dfs.block.size</name>
  <value>67108864</value>
  <description>The default block size for new files.</description>
</property>

<property>
  <name>dfs.df.interval</name>
  <value>60000</value>
  <description>Disk usage statistics refresh interval in msec.</description>
</property>

<property>
  <name>dfs.client.block.write.retries</name>
  <value>3</value>
  <description>The number of retries for writing blocks to the data nodes,
  before we signal failure to the application.
  </description>
</property>

<property>
  <name>dfs.blockreport.intervalMsec</name>
  <value>3600000</value>
  <description>Determines block reporting interval in milliseconds.</description>
</property>

<property>
  <name>dfs.blockreport.initialDelay</name>  <value>0</value>
  <description>Delay for first block report in seconds.</description>
</property>

<property>
  <name>dfs.heartbeat.interval</name>
  <value>3</value>
  <description>Determines datanode heartbeat interval in seconds.</description>
</property>

<property>
  <name>dfs.namenode.handler.count</name>
  <value>10</value>
  <description>The number of server threads for the namenode.</description>
</property>

<property>
  <name>dfs.safemode.threshold.pct</name>
  <value>0.999f</value>
  <description>
    Specifies the percentage of blocks that should satisfy
    the minimal replication requirement defined by dfs.replication.min.
    Values less than or equal to 0 mean not to wait for any particular
    percentage of blocks before exiting safemode.
    Values greater than 1 will make safe mode permanent.
  </description>
 </property>

<property>
  <name>dfs.namenode.safemode.min.datanodes</name>
  <value>0</value>
  <description>
    Specifies the number of datanodes that must be considered alive
    before the name node exits safemode.
    Values less than or equal to 0 mean not to take the number of live
    datanodes into account when deciding whether to remain in safe mode
    during startup.
    Values greater than the number of datanodes in the cluster
    will make safe mode permanent.
  </description>
</property>

<property>
  <name>dfs.safemode.extension</name>
  <value>30000</value>
  <description>
    Determines extension of safe mode in milliseconds
    after the threshold level is reached.
  </description>
</property>

<property>
  <name>dfs.balance.bandwidthPerSec</name>
  <value>1048576</value>
  <description>
        Specifies the maximum amount of bandwidth that each datanode
        can utilize for the balancing purpose in term of
        the number of bytes per second.
  </description>
</property>

<property>
  <name>dfs.hosts</name>
  <value></value>
  <description>Names a file that contains a list of hosts that are
  permitted to connect to the namenode. The full pathname of the file
  must be specified.  If the value is empty, all hosts are
  permitted.</description>
</property>

<property>
  <name>dfs.hosts.exclude</name>
  <value></value>
  <description>Names a file that contains a list of hosts that are
  not permitted to connect to the namenode.  The full pathname of the
  file must be specified.  If the value is empty, no hosts are
  excluded.</description>
</property>

<property>
  <name>dfs.max.objects</name>
  <value>0</value>
  <description>The maximum number of files, directories and blocks
  dfs supports. A value of zero indicates no limit to the number
  of objects that dfs supports.
  </description>
</property>

<property>
  <name>dfs.namenode.decommission.interval</name>
  <value>30</value>
  <description>Namenode periodicity in seconds to check if decommission is
  complete.</description>
</property>

<property>
  <name>dfs.namenode.decommission.nodes.per.interval</name>
  <value>5</value>
  <description>The number of nodes namenode checks if decommission is complete
  in each dfs.namenode.decommission.interval.</description>
</property>

<property>
  <name>dfs.replication.interval</name>
  <value>3</value>
  <description>The periodicity in seconds with which the namenode computes
  repliaction work for datanodes. </description>
</property>

<property>
  <name>dfs.access.time.precision</name>
  <value>3600000</value>
  <description>The access time for HDFS file is precise upto this value.
               The default value is 1 hour. Setting a value of 0 disables
               access times for HDFS.
  </description>
</property>

<property>
  <name>dfs.support.append</name>
  <description>
    This option is no longer supported. HBase no longer requires that
    this option be enabled as sync is now enabled by default. See
    HADOOP-8230 for additional information.
  </description>
</property>

<property>
  <name>dfs.namenode.delegation.key.update-interval</name>
  <value>86400000</value>
  <description>The update interval for master key for delegation tokens
       in the namenode in milliseconds.
  </description>
</property>

<property>
  <name>dfs.namenode.delegation.token.max-lifetime</name>
  <value>604800000</value>
  <description>The maximum lifetime in milliseconds for which a delegation
      token is valid.
  </description>
</property>

<property>
  <name>dfs.namenode.delegation.token.renew-interval</name>
  <value>86400000</value>
  <description>The renewal interval for delegation token in milliseconds.
  </description>
</property>

<property>
  <name>dfs.datanode.failed.volumes.tolerated</name>
  <value>0</value>
  <description>The number of volumes that are allowed to
  fail before a datanode stops offering service. By default
  any volume failure will cause a datanode to shutdown.
  </description>
</property>

<property>
  <name>dfs.datanode.max.xcievers</name>
  <value>4096</value>
  <description>Specifies the maximum number of threads to use for transferring data
  in and out of the DN.
  </description>
</property>

<property>
  <name>dfs.datanode.readahead.bytes</name>
  <value>4193404</value>
  <description>
        While reading block files, if the Hadoop native libraries are available,
        the datanode can use the posix_fadvise system call to explicitly
        page data into the operating system buffer cache ahead of the current
        reader's position. This can improve performance especially when
        disks are highly contended.

        This configuration specifies the number of bytes ahead of the current
        read position which the datanode will attempt to read ahead. This
        feature may be disabled by configuring this property to 0.

        If the native libraries are not available, this configuration has no
        effect.
  </description>
</property>

<property>
  <name>dfs.datanode.drop.cache.behind.reads</name>
  <value>false</value>
  <description>
        In some workloads, the data read from HDFS is known to be significantly
        large enough that it is unlikely to be useful to cache it in the
        operating system buffer cache. In this case, the DataNode may be
        configured to automatically purge all data from the buffer cache
        after it is delivered to the client. This behavior is automatically
        disabled for workloads which read only short sections of a block
        (e.g HBase random-IO workloads).

        This may improve performance for some workloads by freeing buffer
        cache spage usage for more cacheable data.

        If the Hadoop native libraries are not available, this configuration
        has no effect.
  </description>
</property>

<property>
  <name>dfs.datanode.drop.cache.behind.writes</name>
  <value>false</value>
  <description>
        In some workloads, the data written to HDFS is known to be significantly
        large enough that it is unlikely to be useful to cache it in the
        operating system buffer cache. In this case, the DataNode may be
        configured to automatically purge all data from the buffer cache
        after it is written to disk.

        This may improve performance for some workloads by freeing buffer
        cache spage usage for more cacheable data.

        If the Hadoop native libraries are not available, this configuration
        has no effect.
  </description>
</property>

<property>
  <name>dfs.datanode.sync.behind.writes</name>
  <value>false</value>
  <description>
        If this configuration is enabled, the datanode will instruct the
        operating system to enqueue all written data to the disk immediately
        after it is written. This differs from the usual OS policy which
        may wait for up to 30 seconds before triggering writeback.

        This may improve performance for some workloads by smoothing the
        IO profile for data written to disk.

        If the Hadoop native libraries are not available, this configuration
        has no effect.
  </description>
</property>

<property>
  <name>dfs.client.use.datanode.hostname</name>
  <value>false</value>
  <description>Whether clients should use datanode hostnames when
    connecting to datanodes.
  </description>
</property>

<property>
  <name>dfs.datanode.use.datanode.hostname</name>
  <value>false</value>
  <description>Whether datanodes should use datanode hostnames when
    connecting to other datanodes for data transfer.
  </description>
</property>

<property>
  <name>dfs.client.local.interfaces</name>
  <value></value>
  <description>A comma separated list of network interface names to use
    for data transfer between the client and datanodes. When creating
    a connection to read from or write to a datanode, the client
    chooses one of the specified interfaces at random and binds its
    socket to the IP of that interface. Individual names may be
    specified as either an interface name (eg "eth0"), a subinterface
    name (eg "eth0:0"), or an IP address (which may be specified using
    CIDR notation to match a range of IPs).
  </description>
</property>

<property>
  <name>dfs.image.transfer.bandwidthPerSec</name>
  <value>0</value>
  <description>
    Specifies the maximum amount of bandwidth that can be utilized
    for image transfer in term of the number of bytes per second.
    A default value of 0 indicates that throttling is disabled.
  </description>
</property>

<property>
  <name>dfs.webhdfs.enabled</name>
  <value>false</value>
  <description>
    Enable WebHDFS (REST API) in Namenodes and Datanodes.
  </description>
</property>

<property>
  <name>dfs.namenode.kerberos.internal.spnego.principal</name>
  <value>${dfs.web.authentication.kerberos.principal}</value>
</property>

<property>
  <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
  <value>${dfs.web.authentication.kerberos.principal}</value>
</property>

<property>
  <name>dfs.namenode.invalidate.work.pct.per.iteration</name>
  <value>0.32f</value>
  <description>
    *Note*: Advanced property. Change with caution.
    This determines the percentage amount of block
    invalidations (deletes) to do over a single DN heartbeat
    deletion command. The final deletion count is determined by applying this
    percentage to the number of live nodes in the system.
    The resultant number is the number of blocks from the deletion list
    chosen for proper invalidation over a single heartbeat of a single DN.
    Value should be a positive, non-zero percentage in float notation (X.Yf),
    with 1.0f meaning 100%.
  </description>
</property>

<property>
  <name>dfs.namenode.replication.work.multiplier.per.iteration</name>
  <value>2</value>
  <description>
    *Note*: Advanced property. Change with caution.
    This determines the total amount of block transfers to begin in
    parallel at a DN, for replication, when such a command list is being
    sent over a DN heartbeat by the NN. The actual number is obtained by
    multiplying this multiplier with the total number of live nodes in the
    cluster. The result number is the number of blocks to begin transfers
    immediately for, per DN heartbeat. This number can be any positive,
    non-zero integer.
  </description>
</property>

<property>
  <name>dfs.namenode.avoid.read.stale.datanode</name>
  <value>false</value>
  <description>
    Indicate whether or not to avoid reading from &quot;stale&quot; datanodes whose
    heartbeat messages have not been received by the namenode
    for more than a specified time interval. Stale datanodes will be
    moved to the end of the node list returned for reading. See
    dfs.namenode.avoid.write.stale.datanode for a similar setting for writes.
  </description>
</property>

<property>
  <name>dfs.namenode.avoid.write.stale.datanode</name>
  <value>false</value>
  <description>
    Indicate whether or not to avoid writing to &quot;stale&quot; datanodes whose
    heartbeat messages have not been received by the namenode
    for more than a specified time interval. Writes will avoid using
    stale datanodes unless more than a configured ratio
    (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as
    stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting
    for reads.
  </description>
</property>

<property>
  <name>dfs.namenode.stale.datanode.interval</name>
  <value>30000</value>
  <description>
    Default time interval for marking a datanode as "stale", i.e., if
    the namenode has not received heartbeat msg from a datanode for
    more than this time interval, the datanode will be marked and treated
    as "stale" by default. The stale interval cannot be too small since
    otherwise this may cause too frequent change of stale states.
    We thus set a minimum stale interval value (the default value is 3 times
    of heartbeat interval) and guarantee that the stale interval cannot be less
    than the minimum value.
  </description>
</property>

<property>
  <name>dfs.namenode.write.stale.datanode.ratio</name>
  <value>0.5f</value>
  <description>
    When the ratio of number stale datanodes to total datanodes marked
    is greater than this ratio, stop avoiding writing to stale nodes so
   as to prevent causing hotspots.
  </description>
</property>

<property>
  <name>dfs.datanode.plugins</name>
  <value></value>
  <description>Comma-separated list of datanode plug-ins to be activated.
  </description>
</property>

<property>
  <name>dfs.namenode.plugins</name>
  <value></value>
  <description>Comma-separated list of namenode plug-ins to be activated.
  </description>
</property>

</configuration>