Remove edp-examples
The EDP examples will be moved to the sahara repo and removed from sahara-extra. Partial-implements: blueprint edp-move-examples Change-Id: Iff684b2bb688c44ab7f9f49fcb326dfdba787284
This commit is contained in:
parent
21e037c120
commit
6f67ebb9cb
|
@ -1,5 +1,4 @@
|
|||
EDP Examples
|
||||
============
|
||||
|
||||
* Pig job example - trim spaces in input file
|
||||
* Edp wordcount - a version of WordCount that works with swift input and output
|
||||
The EDP examples have been moved to the **sahara** repo under *sahara/edp-examples*
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
=====================
|
||||
EDP WordCount Example
|
||||
=====================
|
||||
Overview
|
||||
========
|
||||
|
||||
``WordCount.java`` is a modified version of the WordCount example bundled with
|
||||
version 1.2.1 of Apache Hadoop. It has been extended for use from a java action
|
||||
in an Oozie workflow. The modification below allows any configuration values
|
||||
from the ``<configuration>`` tag in an Oozie workflow to be set in the Configuration
|
||||
object::
|
||||
|
||||
// This will add properties from the <configuration> tag specified
|
||||
// in the Oozie workflow. For java actions, Oozie writes the
|
||||
// configuration values to a file pointed to by ooze.action.conf.xml
|
||||
conf.addResource(new Path("file:///",
|
||||
System.getProperty("oozie.action.conf.xml")));
|
||||
|
||||
In the example workflow, we use the ``<configuration>`` tag to specify user and
|
||||
password configuration values for accessing swift objects.
|
||||
|
||||
Compiling
|
||||
=========
|
||||
|
||||
To build the jar, add ``hadoop-core`` and ``commons-cli`` to the classpath.
|
||||
|
||||
On a node running Ubuntu 13.04 with hadoop 1.2.1 the following commands
|
||||
will compile ``WordCount.java`` from within the ``src`` directory::
|
||||
|
||||
$ mkdir wordcount_classes
|
||||
$ javac -classpath /usr/share/hadoop/hadoop-core-1.2.1.jar:/usr/share/hadoop/lib/commons-cli-1.2.jar -d wordcount_classes WordCount.java
|
||||
$ jar -cvf edp-java.jar -C wordcount_classes/ .
|
||||
|
||||
(A compiled ``edp-java.jar`` is included in ``wordcount/lib``. Replace it if you rebuild)
|
||||
|
||||
Note, on a node with hadoop 2.3.0 the ``javac`` command above can be replaced with:
|
||||
|
||||
$ javac -classpath /opt/hadoop-2.3.0/share/hadoop/common/hadoop-common-2.3.0.jar:/opt/hadoop-2.3.0/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.3.0.jar:/opt/hadoop-2.3.0/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/hadoop-2.3.0/share/hadoop/mapreduce/lib/hadoop-annotations-2.3.0.jar -d wordcount_classes WordCount.java
|
||||
|
||||
|
||||
Running from the command line with Oozie
|
||||
========================================
|
||||
|
||||
The ``wordcount`` subdirectory contains a ``job.properties`` file, a ``workflow.xml`` file,
|
||||
and a ``lib`` directory with an ``edp-java.jar`` compiled as above.
|
||||
|
||||
To run this example from Oozie, you will need to modify the ``job.properties`` file
|
||||
to specify the correct ``jobTracker`` and ``nameNode`` addresses for your cluster.
|
||||
|
||||
You will also need to modify the ``workflow.xml`` file to contain the correct input
|
||||
and output paths. These paths may be Sahara swift urls or hdfs paths. If swift
|
||||
urls are used, set the ``fs.swift.service.sahara.username`` and ``fs.swift.service.sahara.password``
|
||||
properties in the ``<configuration>`` section.
|
||||
|
||||
1) Upload the ``wordcount`` directory to hdfs
|
||||
|
||||
``$ hadoop fs -put wordcount wordcount``
|
||||
|
||||
2) Launch the job, specifying the correct oozie server and port
|
||||
|
||||
``$ oozie job -oozie http://oozie_server:port/oozie -config wordcount/job.properties -run``
|
||||
|
||||
3) Don't forget to create your swift input path! A Sahara swift url looks like *swift://container.sahara/object*
|
||||
|
||||
Running from the Sahara UI
|
||||
===========================
|
||||
|
||||
Running the WordCount example from the Sahara UI is very similar to running a Pig, Hive,
|
||||
or MapReduce job.
|
||||
|
||||
1) Create a job binary that points to the ``edp-java.jar`` file
|
||||
2) Create a ``Java`` job type and add the job binary to the ``libs`` value
|
||||
3) Launch the job:
|
||||
|
||||
|
||||
a) Add the input and output paths to ``args``
|
||||
b) If swift input or output paths are used, set the ``fs.swift.service.sahara.username`` and ``fs.swift.service.sahara.password``
|
||||
configuration values
|
||||
c) The Sahara UI will prompt for the required ``main_class`` value and the optional ``java_opts`` value
|
||||
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
This product includes software developed by The Apache Software
|
||||
Foundation (http://www.apache.org/).
|
|
@ -1,95 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.openstack.sahara.examples;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.util.GenericOptionsParser;
|
||||
|
||||
public class WordCount {
|
||||
|
||||
public static class TokenizerMapper
|
||||
extends Mapper<Object, Text, Text, IntWritable>{
|
||||
|
||||
private final static IntWritable one = new IntWritable(1);
|
||||
private Text word = new Text();
|
||||
|
||||
public void map(Object key, Text value, Context context
|
||||
) throws IOException, InterruptedException {
|
||||
StringTokenizer itr = new StringTokenizer(value.toString());
|
||||
while (itr.hasMoreTokens()) {
|
||||
word.set(itr.nextToken());
|
||||
context.write(word, one);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class IntSumReducer
|
||||
extends Reducer<Text,IntWritable,Text,IntWritable> {
|
||||
private IntWritable result = new IntWritable();
|
||||
|
||||
public void reduce(Text key, Iterable<IntWritable> values,
|
||||
Context context
|
||||
) throws IOException, InterruptedException {
|
||||
int sum = 0;
|
||||
for (IntWritable val : values) {
|
||||
sum += val.get();
|
||||
}
|
||||
result.set(sum);
|
||||
context.write(key, result);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
|
||||
if (otherArgs.length != 2) {
|
||||
System.err.println("Usage: wordcount <in> <out>");
|
||||
System.exit(2);
|
||||
}
|
||||
|
||||
// ---- Begin modifications for EDP ----
|
||||
// This will add properties from the <configuration> tag specified
|
||||
// in the Oozie workflow. For java actions, Oozie writes the
|
||||
// configuration values to a file pointed to by ooze.action.conf.xml
|
||||
conf.addResource(new Path("file:///",
|
||||
System.getProperty("oozie.action.conf.xml")));
|
||||
// ---- End modifications for EDP ----
|
||||
|
||||
Job job = new Job(conf, "word count");
|
||||
job.setJarByClass(WordCount.class);
|
||||
job.setMapperClass(TokenizerMapper.class);
|
||||
job.setCombinerClass(IntSumReducer.class);
|
||||
job.setReducerClass(IntSumReducer.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(IntWritable.class);
|
||||
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
|
||||
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
|
||||
System.exit(job.waitForCompletion(true) ? 0 : 1);
|
||||
}
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
nameNode=hdfs://1.2.3.4:8020
|
||||
jobTracker=1.2.3.4:8021
|
||||
queueName=default
|
||||
|
||||
oozie.wf.application.path=${nameNode}/user/${user.name}/wordcount
|
Binary file not shown.
|
@ -1,49 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<workflow-app xmlns="uri:oozie:workflow:0.2" name="java-main-wf">
|
||||
<start to="java-node"/>
|
||||
<action name="java-node">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapred.job.queue.name</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.swift.service.sahara.username</name>
|
||||
<value>swiftuser</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.swift.service.sahara.password</name>
|
||||
<value>swiftpassword</value>
|
||||
</property>
|
||||
</configuration>
|
||||
<main-class>org.openstack.sahara.examples.WordCount</main-class>
|
||||
<arg>swift://user.sahara/input</arg>
|
||||
<arg>swift://user.sahara/output</arg>
|
||||
</java>
|
||||
<ok to="end"/>
|
||||
<error to="fail"/>
|
||||
</action>
|
||||
<kill name="fail">
|
||||
<message>Java failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
<end name="end"/>
|
||||
</workflow-app>
|
|
@ -1,4 +0,0 @@
|
|||
Example Pig job
|
||||
===============
|
||||
|
||||
This script trims spaces in input text
|
|
@ -1,3 +0,0 @@
|
|||
A = load '$INPUT' using PigStorage(':') as (fruit: chararray);
|
||||
B = foreach A generate com.hadoopbook.pig.Trim(fruit);
|
||||
store B into '$OUTPUT' USING PigStorage();
|
|
@ -1,4 +0,0 @@
|
|||
pomegranate
|
||||
banana
|
||||
apple
|
||||
lychee
|
|
@ -1,4 +0,0 @@
|
|||
pomegranate
|
||||
banana
|
||||
apple
|
||||
lychee
|
Binary file not shown.
Loading…
Reference in New Issue