Java 火花流式rawSocketStream
我正在尝试spark流并收听套接字,我正在使用rawSocketStream方法创建接收器和数据流。但是当我打印数据流时,我得到了下面的异常 创建数据流的代码: Stacktrace如下所示: 工作代码 通过TCP发送protobuf对象的代码Java 火花流式rawSocketStream,java,apache-spark,protocol-buffers,spark-streaming,Java,Apache Spark,Protocol Buffers,Spark Streaming,我正在尝试spark流并收听套接字,我正在使用rawSocketStream方法创建接收器和数据流。但是当我打印数据流时,我得到了下面的异常 创建数据流的代码: Stacktrace如下所示: 工作代码 通过TCP发送protobuf对象的代码 ServerSocket serverSocket = new ServerSocket(9999); log.info(tracePrefix + "Waiting for connections ...");
ServerSocket serverSocket = new ServerSocket(9999);
log.info(tracePrefix + "Waiting for connections ...");
Socket s1 = serverSocket.accept();
log.info(tracePrefix + "Accepted a connection ...");
while(true) {
Thread.sleep(3000);
DataOutputStream out = new DataOutputStream(s1.getOutputStream());
byte[] bytes = book.toByteArray();
log.info(tracePrefix + "Serialized size: " + book.getSerializedSize());
out.writeInt(book.getSerializedSize());
log.info(tracePrefix + "Sending bytes: " + Arrays.toString(bytes));
out.write(bytes);
// out.write("hello world !".getBytes());
out.flush();
log.info(tracePrefix + "Written to new socket");
}
创建接收器和数据流的代码
JavaReceiverInputDStream<GeneratedMessage> rawStream = jssc.receiverStream(new JavaSocketReceiver("localhost", 9999));
log.info(tracePrefix + "Created the stream ...");
rawStream.print();
private static class JavaSocketReceiver extends Receiver<GeneratedMessage> {
/**
*
*/
private static final long serialVersionUID = -958378677169958045L;
String host = null;
int port = -1;
JavaSocketReceiver(String host_, int port_) {
super(StorageLevel.MEMORY_AND_DISK());
host = host_;
port = port_;
}
@Override
public void onStart() {
new Thread() {
@Override
public void run() {
receive();
}
}.start();
}
@Override
public void onStop() {
}
private void receive() {
try {
Socket socket = null;
ObjectInputStream in = null;
try {
// Open a socket to the target address and keep reading from
// it
log.info(tracePrefix + "Connecting to " + host + ":" + port);
SocketChannel channel = SocketChannel.open();
channel.configureBlocking(true);
channel.connect(new InetSocketAddress(host, port));
log.info(tracePrefix + "Connected to " + host + ":" + port);
ArrayBlockingQueue<ByteBuffer> queue = new ArrayBlockingQueue<>(2);
Thread blockPushingThread = new Thread(new Runnable() {
@Override
public void run() {
int nextBlockNumber = 0;
while (true) {
try {
ByteBuffer buffer = queue.take();
nextBlockNumber += 1;
AddressBook book = AddressBook.parseFrom(buffer.array());
// log.info(tracePrefix + "Got back the object: " + book);
store(book);
} catch (InterruptedException ie) {
log.error(tracePrefix + "Failed processing data", ie);
} catch (Throwable t) {
log.error(tracePrefix + "Failed processing data", t);
}
}
}
});
blockPushingThread.setDaemon(true);
blockPushingThread.start();
ByteBuffer lengthBuffer = ByteBuffer.allocate(4);
while (true) {
lengthBuffer.clear();
readFully(channel, lengthBuffer);
lengthBuffer.flip();
int length = lengthBuffer.getInt();
// log.info(tracePrefix + "The length read: " + length);
ByteBuffer dataBuffer = ByteBuffer.allocate(length);
readFully(channel, dataBuffer);
dataBuffer.flip();
// log.info(tracePrefix + "Read a block with " + length + " bytes");
queue.put(dataBuffer);
}
} finally {
Closeables.close(in, /* swallowIOException = */ true);
Closeables.close(socket, /* swallowIOException = */ true);
}
} catch (ConnectException ce) {
ce.printStackTrace();
restart("Could not connect", ce);
} catch (Throwable t) {
t.printStackTrace();
restart("Error receiving data", t);
}
}
private void readFully(ReadableByteChannel channel, ByteBuffer dest) {
while (dest.position() < dest.limit()) {
try {
if (channel.read(dest) == -1) {
throw new EOFException("End of channel");
}
} catch (IOException e) {
log.error(tracePrefix + "Failed reading from channel: " + channel, e);
}
}
}
}
JavaReceiverInputDStream rawStream=jssc.receiverStream(新的JavaSocketReceiver(“localhost”,9999));
log.info(tracePrefix+“创建了流…”);
print();
私有静态类JavaSocketReceiver扩展了Receiver{
/**
*
*/
私有静态最终长serialVersionUID=-958378677169958045L;
字符串host=null;
int端口=-1;
JavaSocketReceiver(字符串主机,int端口){
超级(StorageLevel.MEMORY_和_DISK());
主机=主机;
端口=端口;
}
@凌驾
public void onStart(){
新线程(){
@凌驾
公开募捐{
接收();
}
}.start();
}
@凌驾
公共void onStop(){
}
私有无效接收(){
试一试{
套接字=空;
ObjectInputStream in=null;
试试{
//打开一个到目标地址的套接字,并继续从
//它
log.info(tracePrefix+“连接到”+主机+:“+端口);
SocketChannel通道=SocketChannel.open();
channel.configureBlocking(真);
connect(新的InetSocketAddress(主机、端口));
log.info(tracePrefix+”连接到“+主机+”:“+端口);
ArrayBlockingQueue=新的ArrayBlockingQueue(2);
Thread blockPushingThread=新线程(new Runnable()){
@凌驾
公开募捐{
int nextBlockNumber=0;
while(true){
试一试{
ByteBuffer buffer=queue.take();
nextBlockNumber+=1;
AddressBook=AddressBook.parseFrom(buffer.array());
//log.info(tracePrefix+“获取了对象:”+book);
商店(书);
}捕获(中断异常ie){
日志错误(tracePrefix+“处理数据失败”,即);
}捕获(可丢弃的t){
日志错误(tracePrefix+“处理数据失败”,t);
}
}
}
});
blockPushingThread.setDaemon(true);
blockPushingThread.start();
ByteBuffer lengthBuffer=ByteBuffer.allocate(4);
while(true){
lengthBuffer.clear();
准备就绪(通道、长度缓冲区);
lengthBuffer.flip();
int length=lengthBuffer.getInt();
//log.info(tracePrefix+“长度读取:”+length);
ByteBuffer dataBuffer=ByteBuffer.allocate(长度);
准备就绪(通道、数据缓冲);
dataBuffer.flip();
//log.info(tracePrefix+“读取带“+长度+”字节的块”);
queue.put(dataBuffer);
}
}最后{
Closeables.close(in,/*异常=*/true);
Closeables.close(套接字,/*异常=*/true);
}
}捕获(连接异常){
printStackTrace();
重新启动(“无法连接”,ce);
}捕获(可丢弃的t){
t、 printStackTrace();
重新启动(“接收数据错误”,t);
}
}
私有void已就绪(可由Techannel频道读取,由Tebuffer dest读取){
while(dest.position()
上面的JavaSocketReceiver取自spark streaming模块的rawSocketStream。在发送字节的客户端代码中,如果我将DataOutputStream更改为ObjectOutputStream,则会出现损坏的头异常;在流式代码中,如果我使用内置的rawSocketStream侦听传入的数据包,则会出现ByteBuffer(334)处的IllegalArgumentException。如果我们查看文档,IllegalArgumentException
只能由尝试分配负缓冲区大小引起
RawInputDStream
协议需要一个整数大小字段,后跟相应的有效负载。那块地是一块地
问题中显示的发送方程序:
out.writeByte(book.getSerializedSize());
正在将整数大小写入一个字节。因此,当读取端尝试解码有效负载大小时,它将读取一个损坏的字段,因为它将该字节与有效负载的某些信息相结合,解码后会产生一个负整数
解决方案应该是写入一个4字节(32位)整数:
out.writeInt(book.getSerializedSize());
我一直在使用JavaStreamingContext类的rawSocketStream方法研究类似的问题。就我而言,我
JavaReceiverInputDStream<GeneratedMessage> rawStream = jssc.receiverStream(new JavaSocketReceiver("localhost", 9999));
log.info(tracePrefix + "Created the stream ...");
rawStream.print();
private static class JavaSocketReceiver extends Receiver<GeneratedMessage> {
/**
*
*/
private static final long serialVersionUID = -958378677169958045L;
String host = null;
int port = -1;
JavaSocketReceiver(String host_, int port_) {
super(StorageLevel.MEMORY_AND_DISK());
host = host_;
port = port_;
}
@Override
public void onStart() {
new Thread() {
@Override
public void run() {
receive();
}
}.start();
}
@Override
public void onStop() {
}
private void receive() {
try {
Socket socket = null;
ObjectInputStream in = null;
try {
// Open a socket to the target address and keep reading from
// it
log.info(tracePrefix + "Connecting to " + host + ":" + port);
SocketChannel channel = SocketChannel.open();
channel.configureBlocking(true);
channel.connect(new InetSocketAddress(host, port));
log.info(tracePrefix + "Connected to " + host + ":" + port);
ArrayBlockingQueue<ByteBuffer> queue = new ArrayBlockingQueue<>(2);
Thread blockPushingThread = new Thread(new Runnable() {
@Override
public void run() {
int nextBlockNumber = 0;
while (true) {
try {
ByteBuffer buffer = queue.take();
nextBlockNumber += 1;
AddressBook book = AddressBook.parseFrom(buffer.array());
// log.info(tracePrefix + "Got back the object: " + book);
store(book);
} catch (InterruptedException ie) {
log.error(tracePrefix + "Failed processing data", ie);
} catch (Throwable t) {
log.error(tracePrefix + "Failed processing data", t);
}
}
}
});
blockPushingThread.setDaemon(true);
blockPushingThread.start();
ByteBuffer lengthBuffer = ByteBuffer.allocate(4);
while (true) {
lengthBuffer.clear();
readFully(channel, lengthBuffer);
lengthBuffer.flip();
int length = lengthBuffer.getInt();
// log.info(tracePrefix + "The length read: " + length);
ByteBuffer dataBuffer = ByteBuffer.allocate(length);
readFully(channel, dataBuffer);
dataBuffer.flip();
// log.info(tracePrefix + "Read a block with " + length + " bytes");
queue.put(dataBuffer);
}
} finally {
Closeables.close(in, /* swallowIOException = */ true);
Closeables.close(socket, /* swallowIOException = */ true);
}
} catch (ConnectException ce) {
ce.printStackTrace();
restart("Could not connect", ce);
} catch (Throwable t) {
t.printStackTrace();
restart("Error receiving data", t);
}
}
private void readFully(ReadableByteChannel channel, ByteBuffer dest) {
while (dest.position() < dest.limit()) {
try {
if (channel.read(dest) == -1) {
throw new EOFException("End of channel");
}
} catch (IOException e) {
log.error(tracePrefix + "Failed reading from channel: " + channel, e);
}
}
}
}
out.writeByte(book.getSerializedSize());
out.writeInt(book.getSerializedSize());
ServerSocket serverSocket = new ServerSocket(9999);
Socket clientSocket = serverSocket.accept();
OutputStream outputStream = clientSocket.getOutputStream();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(bos);
try(ServerSocket serverSocket = new ServerSocket(9999);
Socket clientSocket = serverSocket.accept();
OutputStream outputStream = clientSocket.getOutputStream();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(bos);)
{
byte[] bytes;
// Load the byte[] with data
...
oos.writeObject(bytes);
oos.flush();
oos.close();
outputStream.write(bos.size() >> 24);
outputStream.write(bos.size() >> 16);
outputStream.write(bos.size() >> 8);
outputStream.write(bos.size());
outputStream.write(bos.toByteArray());
// Keep socket connections open
}
catch (IOException e) {
e.printStackTrace();
}
SparkConf conf = new SparkConf().setAppName("log jamming").setMaster("local[2]");
JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(1));
JavaReceiverInputDStream<byte[]> bytes = jsc.rawSocketStream("localhost", 9999);
// Have fun with the RDD
jsc.start();
jsc.awaitTermination();