Apache spark 如何用Keras模型实现实时推理的Spark?
下面是我的一段代码:Apache spark 如何用Keras模型实现实时推理的Spark?,apache-spark,keras,pyspark,apache-spark-sql,streaming,Apache Spark,Keras,Pyspark,Apache Spark Sql,Streaming,下面是我的一段代码: @pandas_udf(StringType()) def online_predict(values: pd.Series) -> pd.Series: pred = Model.from_config(bc_config.value) pred.set_weights(bc_weights.value) ds = tf.data.Dataset.from_tensor_slices(values) ds = ds.map(prepr
@pandas_udf(StringType())
def online_predict(values: pd.Series) -> pd.Series:
pred = Model.from_config(bc_config.value)
pred.set_weights(bc_weights.value)
ds = tf.data.Dataset.from_tensor_slices(values)
ds = ds.map(preprocessing).batch(batch_size)
res = pred.predict(ds)
res = tf.norm(res, axis=1)
# res = tf.greater(res, 5.0)
res = tf.strings.as_string(res).numpy()
return pd.Series(res)
spark = SparkSession.builder.appName(
'spark_tf').master("local[*]").getOrCreate()
weights = np.load('./ext/weights.npy', allow_pickle=True)
config = np.load('./ext/config.npy', allow_pickle=True).item()
bc_weights = spark.sparkContext.broadcast(weights)
bc_config = spark.sparkContext.broadcast(config)
stream = spark.readStream.format('kafka') \
.option('kafka.bootstrap.servers', 'localhost:9092') \
.option('subscribe', 'dlpred') \
.load()
stream = stream.select(online_predict(col('value')).alias('value'))
x = stream.writeStream \
.format('kafka') \
.option("kafka.bootstrap.servers", 'localhost:9092') \
.option('topic', 'dltest') \
.option('checkpointLocation', './kafka_checkpoint') \
.start()
x.awaitTermination()
因此,我的工作流程基本上是: