Hadoop Spark Streaming CustomReceiver未知主机异常
我是新来的,可以激发流。我想在线流式传输url,以便从某个url检索信息,我使用JavaCustomReceiver流式传输url 这是我正在使用的代码()Hadoop Spark Streaming CustomReceiver未知主机异常,hadoop,apache-spark,spark-streaming,bigdata,Hadoop,Apache Spark,Spark Streaming,Bigdata,我是新来的,可以激发流。我想在线流式传输url,以便从某个url检索信息,我使用JavaCustomReceiver流式传输url 这是我正在使用的代码() 公共类JavaCustomReceiver扩展了Receiver{ 私有静态最终模式空间=Pattern.compile(“”); 公共静态void main(字符串[]args)引发异常{ SparkConf SparkConf=新的SparkConf().setAppName(“JavaCustomReceiver”); JavaStr
公共类JavaCustomReceiver扩展了Receiver{
私有静态最终模式空间=Pattern.compile(“”);
公共静态void main(字符串[]args)引发异常{
SparkConf SparkConf=新的SparkConf().setAppName(“JavaCustomReceiver”);
JavaStreamingContext ssc=新的JavaStreamingContext(sparkConf,新的持续时间(1000));
JavaReceiverInputDStream行=ssc.receiverStream(
新的JavaCustomReceiver(“http://stream.meetup.com/2/rsvps", 80));
JavaDStream words=lines.flatMap(新
FlatMapFunction(){
@凌驾
公共迭代器调用(字符串x){
返回Arrays.asList(SPACE.split(x)).iterator();
}
});
JavaPairDStream wordCounts=words.mapToPair(
新PairFunction(){
@凌驾
公共元组2调用(字符串s){
返回新的Tuple2(s,1);
}
}).reduceByKey(新功能2(){
@凌驾
公共整数调用(整数i1、整数i2){
返回i1+i2;
}
});
wordCounts.print();
ssc.start();
ssc.终止();
}
字符串host=null;
int端口=-1;
公共JavaCustomReceiver(字符串主机、int端口){
super(StorageLevel.MEMORY_和_DISK_2());
主机=主机;
端口=端口;
}
public void onStart(){
新线程(){
@凌驾
公开募捐{
接收();
}
}.start();
}
公共void onStop(){
}
私有无效接收(){
试一试{
套接字=空;
BufferedReader reader=null;
字符串userInput=null;
试一试{
//连接到服务器
套接字=新套接字(主机、端口);
读卡器=新的BufferedReader(
新的InputStreamReader(socket.getInputStream(),StandardCharsets.UTF_8));
//直到停止或连接断开,继续读取
而(!isStopped()&&(userInput=reader.readLine())!=null){
System.out.println(“接收到的数据“+”用户输入“+””);
存储(用户输入);
}
}最后{
Closeables.close(读卡器,/*异常=*/true);
Closeables.close(套接字,/*异常=*/true);
}
重新启动(“尝试再次连接”);
}捕获(连接异常){
//如果无法连接到服务器,请重新启动
重新启动(“无法连接”,ce);
}捕获(可丢弃的t){
重新启动(“接收数据错误”,t);
}
}
}
然而,我不断得到一个java.net.UnknownHostException
我怎样才能解决这个问题?我使用的代码有什么问题?阅读引用的自定义接收器的代码后,很明显,它是连接到
主机:端口的TCP接收器,而不是可以接收URL的HTTP接收器。您必须将代码更改为从HTTP端点读取。读取引用的自定义接收器的代码后,很明显,它是一个连接到主机:端口的TCP接收器,而不是可以获取URL的HTTP接收器。您必须将代码更改为从HTTP端点读取。@maasg您是对的。我把它改为URL和openStream(),而不是sockets,它工作了,我能够得到数据!!
public class JavaCustomReceiver extends Receiver<String> {
private static final Pattern SPACE = Pattern.compile(" ");
public static void main(String[] args) throws Exception {
SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver");
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
JavaReceiverInputDStream<String> lines = ssc.receiverStream(
new JavaCustomReceiver("http://stream.meetup.com/2/rsvps", 80));
JavaDStream<String> words = lines.flatMap(new
FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Arrays.asList(SPACE.split(x)).iterator();
}
});
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<>(s, 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
wordCounts.print();
ssc.start();
ssc.awaitTermination();
}
String host = null;
int port = -1;
public JavaCustomReceiver(String host_, int port_) {
super(StorageLevel.MEMORY_AND_DISK_2());
host = host_;
port = port_;
}
public void onStart() {
new Thread() {
@Override
public void run() {
receive();
}
}.start();
}
public void onStop() {
}
private void receive() {
try {
Socket socket = null;
BufferedReader reader = null;
String userInput = null;
try {
// connect to the server
socket = new Socket(host, port);
reader = new BufferedReader(
new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8));
// Until stopped or connection broken continue reading
while (!isStopped() && (userInput = reader.readLine()) != null) {
System.out.println("Received data '" + userInput + "'");
store(userInput);
}
} finally {
Closeables.close(reader, /* swallowIOException = */ true);
Closeables.close(socket, /* swallowIOException = */ true);
}
restart("Trying to connect again");
} catch (ConnectException ce) {
// restart if could not connect to server
restart("Could not connect", ce);
} catch (Throwable t) {
restart("Error receiving data", t);
}
}
}