在postgresql数据库中将R代码推送到PL/R代码
我正在尝试使用Rpostgresql将成功的R代码转换为PL/R代码,以避免将数据推入或拉出postgresql数据库 代码是data.table上的dcast:在postgresql数据库中将R代码推送到PL/R代码,r,postgresql,psql,plr,R,Postgresql,Psql,Plr,我正在尝试使用Rpostgresql将成功的R代码转换为PL/R代码,以避免将数据推入或拉出postgresql数据库 代码是data.table上的dcast: #libs library(RPostgreSQL); library(data.table); # connect drv <- dbDriver("PostgreSQL"); con <- dbConnect(drv, dbname="postgres", user="postgres"); # load cli_
#libs
library(RPostgreSQL);
library(data.table);
# connect
drv <- dbDriver("PostgreSQL");
con <- dbConnect(drv, dbname="postgres", user="postgres");
# load
cli_ranges <- dbGetQuery(con, "SELECT custid, prod_ranges, is_cli from cli_ranges;")
# DT
setDT(cli_ranges )
setkeyv(cli_ranges , c("prod_ranges"))
# pivot
cli_ranges.pivoted <- dcast(cli_ranges, custid ~ paste0("is_cli_", prod_ranges), fun=sum, value.var = "is_cli")
# send back to DB
dbWriteTable(con, "cli_ranges_pivoted", cli_ranges.pivoted, row.names=F)
。。。但是R代码的最后一行(dbWriteTable
)抛出:
将data.table更改为dataframe(as.data.frame(cli\u ranges.pivoted)
)也不起作用
一个技巧是返回data.table/frame,以便执行CREATE table cli_ranges_作为SELECT pivot()透视但是我真的不知道如何将data.frame作为输出
cli\u范围
表格:
custid prod_ranges is_cli
1 A 1
1 B 1
1 C 0
2 A 1
2 B 0
2 C 1
3 A 0
3 B 1
3 C 0
4 A 1
... ... ...
dcast(即旋转)后的数据帧如下:
custid prod_ranges_A prod_ranges_B prod_ranges_C
1 1 1 0
2 1 0 1
3 0 1 0
4 1 ...
...
prod_ranges
中不同值的数量经常变化,因此我可以提前定义旋转后的列数
Env:postgresql9.5、r3.3、PL/r08.03.00.16、win1064bits您可以从示例2和3开始
或者,您可以尝试修改Lukas Eklund和Erwind Brandstetter在本文中所做的最不必要的函数(我使用的是Lukas的解决方案):
然后您可以尝试返回一个数组,并执行如下操作:
CREATE OR REPLACE FUNCTION r_norm(n integer, mean float8, std_dev float8)
RETURNS float8[]
AS $$
x<-rnorm(n,mean,std_dev);
y<-rnorm(n,mean,std_dev);
final<-cbind(as.data.frame(x), as.data.frame(y));
return(final)
$$ LANGUAGE 'plr';
CREATE TABLE test_norm_var AS SELECT R_output[1] as col1, R_output[2] as col2 FROM unnest_multidim(r_norm(10,0,1)) R_output;
SELECT col1 FROM test_norm_var;
创建或替换函数r_norm(n整数,平均浮点8,标准差浮点8)
返回浮点8[]
作为$$
X问题是prod_范围的数量随着时间的推移而变化,即我无法定义dcast
将产生多少列(将我的表转换为prod_范围
值)。您能否提供一个输入到dcast或输出的示例(填写假数字等)?由
发布的原始问题
中提供的说明更适合于Pivotal/Greenplum/Redshift而不是postgresql,但谢谢我的想法!感谢所有的代码,但我实际上是在寻找一个更“直接”的解决方案。那么我最好使用交叉表。
custid prod_ranges_A prod_ranges_B prod_ranges_C
1 1 1 0
2 1 0 1
3 0 1 0
4 1 ...
...
CREATE OR REPLACE FUNCTION unnest_multidim(anyarray)
RETURNS SETOF anyarray AS
$BODY$
SELECT array_agg($1[series2.i][series2.x]) FROM
(SELECT generate_series(array_lower($1,2),array_upper($1,2)) as x, series1.i
FROM
(SELECT generate_series(array_lower($1,1),array_upper($1,1)) as i) series1
) series2
GROUP BY series2.i
$BODY$
LANGUAGE sql IMMUTABLE;
CREATE OR REPLACE FUNCTION r_norm(n integer, mean float8, std_dev float8)
RETURNS float8[]
AS $$
x<-rnorm(n,mean,std_dev);
y<-rnorm(n,mean,std_dev);
final<-cbind(as.data.frame(x), as.data.frame(y));
return(final)
$$ LANGUAGE 'plr';
CREATE TABLE test_norm_var AS SELECT R_output[1] as col1, R_output[2] as col2 FROM unnest_multidim(r_norm(10,0,1)) R_output;
SELECT col1 FROM test_norm_var;
CREATE OR REPLACE FUNCTION pivot()
RETURNS VOID as $$
library(RPostgreSQL);
library(data.table);
drv <- dbDriver("PostgreSQL");
con <- dbConnect(drv, dbname ="postgres");
fields <- list(custid = "numeric",prod_ranges = "varchar(128)", is_cli = "numeric")
custid <- c(1,1,1,2,2,2)
prod_ranges <- c("A","B","C","A","B","C")
is_cli <- c(1,1,0,1,0,1)
cli_ranges <- data.frame(custid,prod_ranges,is_cli, stringsAsFactors = default.stringsAsFactors())
setDT(cli_ranges )
setkeyv(cli_ranges , c("prod_ranges"))
cli_ranges.pivoted <- as.data.frame(dcast(cli_ranges, custid ~ paste0("is_cli_", prod_ranges), fun=sum, value.var = "is_cli"))
create_query <- paste0("CREATE TABLE cli_ranges (",paste0(colnames(cli_ranges.pivoted), collapse = " numeric, "),
" numeric) DISTRIBUTED BY (",colnames(cli_ranges)[1],")")
dbGetQuery(con, create_query);
values_string <- "("
for ( i in 1:dim(cli_ranges.pivoted)[1]){
for ( j in 1:dim(cli_ranges.pivoted)[2] ){
if ( j != dim(cli_ranges.pivoted)[2]) {
values_string <- paste0(values_string,cli_ranges.pivoted[i,j],",")
} else {
values_string <- paste0(values_string,cli_ranges.pivoted[i,j])
}
}
if ( i != dim(cli_ranges.pivoted)[1] ){
values_string <- paste0(values_string,"),(")
} else {
values_string <- paste0(values_string,")")
}
}
insert_query <- paste0("INSERT INTO cli_ranges (",paste0(colnames(cli_ranges.pivoted), collapse = ", "),
") VALUES ", values_string )
dbGetQuery(con, insert_query);
$$ LANGUAGE plr;