Servlets Hadoop输出值可以';t从web服务访问
我用Hadoop和Web服务开发了一个程序。我创建了三个hadoop类:驱动程序、映射程序和还原程序。另一个类是servlet。我从servlet调用hadoop驱动程序类,hadoop可以正常工作。但是我想从hadoop的输出文件夹中读取文本,我该怎么做呢?因为,hadoop的输出文件夹在运行servlet时不显示,而在程序完成时显示。因此,我无法访问output文件夹中part-r-00000文件中的文本。 我把这四门课放在一个包里。Myproject>src>sourcecode>Driver、Mapper、Reducer、Servlet 输入文本文件位于WEB\u INF中 这是我的驾驶课Servlets Hadoop输出值可以';t从web服务访问,servlets,hadoop,mapreduce,Servlets,Hadoop,Mapreduce,我用Hadoop和Web服务开发了一个程序。我创建了三个hadoop类:驱动程序、映射程序和还原程序。另一个类是servlet。我从servlet调用hadoop驱动程序类,hadoop可以正常工作。但是我想从hadoop的输出文件夹中读取文本,我该怎么做呢?因为,hadoop的输出文件夹在运行servlet时不显示,而在程序完成时显示。因此,我无法访问output文件夹中part-r-00000文件中的文本。 我把这四门课放在一个包里。Myproject>src>sourcecode>Driv
package sourcecode;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class ALDriver {
public ALDriver() throws IOException, ClassNotFoundException,
InterruptedException {
long startTime = System.currentTimeMillis();
Job job = new Job();
String input = "/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/al001012010.txt";
//String output = "/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/output";
String output = "/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/output";
System.out.println("Here ****************************************");
// Create a new job
// Set job name to locate it in the distributed environment
job.setJarByClass(ALDriver.class);
job.setJobName("ALABAMA");
// Set input and output Path, note that we use the default input format
// which is TextInputFormat (each record is a line of input)
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
// Set Mapper and Reducer class
job.setMapperClass(ALMapper.class);
job.setReducerClass(ALReducer.class);
// Set Output key and value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
long stopTime = System.currentTimeMillis();
long elapsedTime = stopTime - startTime;
System.out.println("Total time : " + elapsedTime);
job.waitForCompletion(true);
//DeleteFolder.deleteDirectory(new File("/home/hadoop/HadoopEclipseWorkspace/WebTest2/output"));
}
public static void main(String[] args) throws Exception {
ALDriver AL = new ALDriver();
}
}
这是mapper类
package sourcecode;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class ALMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String recordnumber = line.substring(16, 23);
int population = Integer.parseInt(line.substring(24));
context.write(new Text(recordnumber), new IntWritable(population));
}
}
包源代码;
导入java.io.IOException;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.LongWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Mapper;
公共类ALMapper扩展
制图员{
缺少专用静态最终整数=9999;
@凌驾
公共void映射(可长写键、文本值、上下文)
抛出IOException、InterruptedException{
字符串行=value.toString();
String recordnumber=line.substring(16,23);
int population=Integer.parseInt(line.substring(24));
write(新文本(记录编号),新IntWritable(填充));
}
}
这是减速器类
package sourcecode;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class ALReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int maxValue = 150000;
for (IntWritable value : values) {
if(value.get() > maxValue){
//maxValue = value.get();
context.write(key, new IntWritable(value.get()));
//System.out.println("The Output: " + key + " and " + value.get());
try(PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/outputtxtfile.txt", true)))) {
out.println( key + " = " + value.get() );
}catch (IOException e) {
//exception handling left as an exercise for the reader
}
}
}
//System.out.println("The Output: " + key + " and " + maxValue);
}
}
包源代码;
导入java.io.BufferedWriter;
导入java.io.FileWriter;
导入java.io.IOException;
导入java.io.PrintWriter;
导入org.apache.hadoop.io.IntWritable;
导入org.apache.hadoop.io.Text;
导入org.apache.hadoop.mapreduce.Reducer;
公共类ALReducer扩展了Reducer{
@凌驾
公共void reduce(文本键、Iterable值、上下文)
抛出IOException、InterruptedException{
int最大值=150000;
对于(可写入值:值){
if(value.get()>maxValue){
//maxValue=value.get();
write(key,newintwriteable(value.get());
//System.out.println(“输出:“+key+”和“+value.get()”);
try(PrintWriter out=new PrintWriter(new BufferedWriter(new FileWriter)(“/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/outputxtfile.txt”,true))){
out.println(key+“=”+value.get());
}捕获(IOE异常){
//异常处理留给读者作为练习
}
}
}
//System.out.println(“输出:“+key+”和“+maxValue”);
}
}
这就是servlet,我在这里调用Hadoop驱动程序类
package sourcecode;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.nio.file.Paths;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.eclipse.core.internal.refresh.RefreshJob;
/**
* Servlet implementation class Readtxtfile
*/
@WebServlet("/Readtxtfile")
public class Readtxtfile extends HttpServlet {
private static final long serialVersionUID = 1L;
boolean receive = false;
/**
* @throws IOException
* @see HttpServlet#HttpServlet()
*/
public Readtxtfile() {
super();
// TODO Auto-generated constructor stub
}
/**
* @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
*/
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
response.setContentType("text/html");
try {
File file = new File("/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/outputtxtfile.txt");
if (file.createNewFile()){
System.out.println("File is created!");
}else{
System.out.println("File already exists.");
}
} catch (IOException e) {
e.printStackTrace();
}
//CallingHadoop c = new CallingHadoop();
String release = "/WEB-INF/output";
Path path = Paths.get(release);
//receive = c.callHadoop();
try {
ALDriver a = new ALDriver();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//
// We are going to read a file called configuration.properties. This
// file is placed under the WEB-INF directory.
//
//ResourcesPlugin.getWorkspace().getRoot().getProjects();
//ResourcesPlugin.getPlugin().getPluginPreferences().setValue(ResourcesPlugin.PREF_AUTO_REFRESH, true);
//if (Files.exists(path, LinkOption.NOFOLLOW_LINKS)){
PrintWriter out = response.getWriter();
out.println("<html>");
out.println("<head><title> Census Data Test </title></head>");
out.println("<body bgColor=\"Green\" >");
out.println("<h1>Census Data Analytics with MapReduce </h1>");
//String filename = "/home/hadoop/HadoopEclipseWorkspace/WebTest2/output/part-r-00000";
ServletContext context = getServletContext();
//
// First get the file InputStream using ServletContext.getResourceAsStream()
// method.
//
String filename = "/WEB-INF/outputtxtfile.txt";
InputStream is = context.getResourceAsStream(filename);
if (is != null) {
InputStreamReader isr = new InputStreamReader(is);
BufferedReader reader = new BufferedReader(isr);
//PrintWriter writer = response.getWriter();
String text = "";
//
// We read the file line by line and later will be displayed on the
// browser page.
//
while ((text = reader.readLine()) != null) {
//System.out.println(text);
//
out.println("<p>"+text);
//text.replaceAll(text, "");
}
}
//File oldFile = new File("/WEB-INF/outputtxtfile.txt");
//oldFile.deleteOnExit();
//DeleteFolder.deleteDirectory(new File("/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/outputtxtfile.txt"));
System.out.println("Done!");
}
protected void doPost(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
}
}
包源代码;
导入java.io.BufferedReader;
导入java.io.File;
导入java.io.IOException;
导入java.io.InputStream;
导入java.io.InputStreamReader;
导入java.io.PrintWriter;
导入java.nio.file.Path;
导入java.nio.file.path;
导入javax.servlet.ServletContext;
导入javax.servlet.ServletException;
导入javax.servlet.annotation.WebServlet;
导入javax.servlet.http.HttpServlet;
导入javax.servlet.http.HttpServletRequest;
导入javax.servlet.http.HttpServletResponse;
导入org.eclipse.core.internal.refresh.RefreshJob;
/**
*Servlet实现类Readtxtfile
*/
@WebServlet(“/Readtxtfile”)
公共类Readtxtfile扩展了HttpServlet{
私有静态最终长serialVersionUID=1L;
布尔接收=假;
/**
*@抛出异常
*@参见HttpServlet#HttpServlet()
*/
公共Readtxtfile(){
超级();
//TODO自动生成的构造函数存根
}
/**
*@参见HttpServlet#doGet(HttpServletRequest请求,HttpServletResponse响应)
*/
受保护的void doGet(HttpServletRequest请求,HttpServletResponse响应)抛出ServletException,IOException{
response.setContentType(“text/html”);
试一试{
File File=new文件(“/home/hadoop/HadoopEclipseWorkspace/WebTest2/WebContent/WEB-INF/outputxtfile.txt”);
if(file.createNewFile()){
System.out.println(“文件已创建!”);
}否则{
System.out.println(“文件已经存在”);
}
}捕获(IOE异常){
e、 printStackTrace();
}
//CallingHadoop c=新CallingHadoop();
字符串release=“/WEB-INF/output”;
Path=Path.get(release);
//receive=c.callHadoop();
试一试{
ALDriver a=新ALDriver();
}catch(classnotfounde异常){
//TODO自动生成的捕捉块
e、 printStackTrace();
}捕捉(中断异常e){
//TODO自动生成的捕捉块
e、 printStackTrace();
}
//
//我们将读取一个名为configuration.properties的文件
//文件放在WEB-INF目录下。
//
//ResourcesPlugin.getWorkspace().getRoot().getProjects();
//ResourcesPlugin.getPlugin().getPluginPreferences().setValue(ResourcesPlugin.PREF_AUTO_REFRESH,true);
//if(Files.exists(path,LinkOption.NOFOLLOW_LINKS)){
PrintWriter out=response.getWriter();
out.println(“”);
out.println(“人口普查数据测试”);
out.println(“”);
out.println(“使用MapReduce进行普查数据分析”);
//字符串filename=“/home/hadoop/HadoopEclipseWorkspace/WebTest2/output/part-r-00000”;
ServletContext=getServletContext();
//
//首先使用ServletContext.getResourceAsStream()获取文件InputStream
//方法。
//
字符串filename=“/WEB-INF/outputXTFile.txt”;
InputStream=context.getResourceAsStream(文件名);
如果(is!=null){
InputStreamReader isr=新的InputStreamReader(is);
缓冲读取器=