Python 如何建立一种混合的方法来计算过去X天的记录数?
我有一个乒乓球比赛表,我想创建一个查询字段,统计焦点比赛日期范围内的前几场比赛的数量。例如:Python 如何建立一种混合的方法来计算过去X天的记录数?,python,mysql,sqlalchemy,Python,Mysql,Sqlalchemy,我有一个乒乓球比赛表,我想创建一个查询字段,统计焦点比赛日期范围内的前几场比赛的数量。例如: +----------+------------+---------------------------+ | match_id | date | num_matches within 3 days | +----------+------------+---------------------------+ | 1 | 01/01/2020 |
+----------+------------+---------------------------+
| match_id | date | num_matches within 3 days |
+----------+------------+---------------------------+
| 1 | 01/01/2020 | 0 |
| 2 | 02/01/2020 | 1 |
| 3 | 03/01/2020 | 2 |
| 4 | 05/01/2020 | 2 |
| 5 | 05/01/2020 | 3 |
| 6 | 10/01/2020 | 0 |
+----------+------------+---------------------------+
我已尝试使用所述的相关子查询,但我不知道如何创建条件:
@hybrid_method
def match_count(self, timespan_days):
cut_off = self.date_time_inferred - timedelta(days=timespan_days)
return sum(x >= cut_off and <some_cond_for_less_than_current_match_date> for x in self.date_time_inferred)
@match_count.expression
def match_count(cls, timespan_days):
cut_off = cls.date_time_inferred - timedelta(days=timespan_days)
return (
select(func.count(cls.date_time_inferred)).
where(
and_(
cls.date_time_inferred.__ge__(cut_off),
cls.date_time_inferred.__lt__(<not_sure_what_goes_here>),
)
).label('match_count')
)
关于守则:
select(func.count(cls.date))
所以我显然做错了一大堆事情
希望有人能帮助我了解如何解决这个问题?也非常愿意用不同的方式来完成同样的事情。我想继续使用混合属性…下面是(几乎)完整的代码片段:
# ... omitted import statements and session configuration
def _date(date_str):
return datetime.strptime(date_str, "%Y-%m-%d")
class Match(Base):
__tablename__ = "match"
match_id = Column(Integer, primary_key=True)
date = Column(Date, nullable=False)
@hybrid_method
def match_count(self, timespan_days):
cut_off = self.date - timedelta(days=timespan_days)
sess = object_session(self)
M = Match
q = (
sess.query(M)
# .filter(M.match_id != self.match_id) # option-1: only other on the same day
.filter(M.match_id < self.match_id) # option-2: only smaller-id on the same day (as in OP)
.filter(M.date <= self.date)
.filter(M.date >= cut_off)
)
return q.count()
@match_count.expression
def match_count(cls, timespan_days):
M = aliased(Match, name="other")
cut_off = cls.date - timespan_days
q = (
select([func.count(M.match_id)])
# .filter(Match.match_id != self.match_id) # option-1: only other on the same day
.where(M.match_id < cls.match_id) # option-2: only smaller-id on the same day (as in OP)
.where(M.date <= cls.date)
.where(M.date >= cut_off)
)
return q.label("match_count")
def test():
Base.metadata.drop_all()
Base.metadata.create_all()
from sys import version_info as py_version
from sqlalchemy import __version__ as sa_version
print(f"PY version={py_version}")
print(f"SA version={sa_version}")
print(f"SA engine={engine.name}")
print("=" * 80)
# 1. test data
matches = [
Match(date=_date("2020-01-01")),
Match(date=_date("2020-01-02")),
Match(date=_date("2020-01-03")),
Match(date=_date("2020-01-05")),
Match(date=_date("2020-01-05")),
Match(date=_date("2020-01-10")),
]
session.add_all(matches)
session.commit()
print("=" * 80)
# 2. test query in "in-memory"
for m in session.query(Match):
print(m, m.match_count(3))
print("=" * 80)
# 3. test query on "SQL"
session.expunge_all()
q = session.query(Match, Match.match_count(3))
for match, match_count in q:
print(match, match_count)
print("=" * 80)
if __name__ == "__main__":
test()
我想指出的一点是,“内存”检查不是很有效,因为必须为每个
Match
实例查询数据库。因此,如果可能,我将使用最后一个查询。您的链接指向1.4文档。您是否正在使用1.4(目前处于测试阶段)?如果没有,请使用您正在使用的版本的文档:1.4包含重大更改。我正在使用1.3-感谢提醒-将更正链接。看起来相关子查询在1.3和1.4中都是相同的。如果仔细观察,你会发现它在1.4中是select(…)
,但在1.3中是select([…])
(因此你会看到错误消息)。啊!这就解决了我问题的第一部分——谢谢:)有什么关于如何处理我自己陷入的困境的提示吗?我不知道如何找到当前匹配的日期,所以我可以写一个小于条件…啊,当然-使用别名并计算:)非常感谢!明天我们会测试这个!嘿我一直在努力解决这个问题。在sess=object\u session(self)中调用的object\u session
方法是什么?这大概是为了执行查询而创建会话的一种方法吧?我通常有一个数据访问层类来创建会话
,我使用它来生成查询。现在我从一个表类中查询,我将如何做不同的事情?遇到上述问题。将此作为一个新问题发布,因为我的直觉是,这可能是MySQL与PostgreSQL之间的问题……我发现了什么是object\u session
。对于其他阅读本文的人-
# ... omitted import statements and session configuration
def _date(date_str):
return datetime.strptime(date_str, "%Y-%m-%d")
class Match(Base):
__tablename__ = "match"
match_id = Column(Integer, primary_key=True)
date = Column(Date, nullable=False)
@hybrid_method
def match_count(self, timespan_days):
cut_off = self.date - timedelta(days=timespan_days)
sess = object_session(self)
M = Match
q = (
sess.query(M)
# .filter(M.match_id != self.match_id) # option-1: only other on the same day
.filter(M.match_id < self.match_id) # option-2: only smaller-id on the same day (as in OP)
.filter(M.date <= self.date)
.filter(M.date >= cut_off)
)
return q.count()
@match_count.expression
def match_count(cls, timespan_days):
M = aliased(Match, name="other")
cut_off = cls.date - timespan_days
q = (
select([func.count(M.match_id)])
# .filter(Match.match_id != self.match_id) # option-1: only other on the same day
.where(M.match_id < cls.match_id) # option-2: only smaller-id on the same day (as in OP)
.where(M.date <= cls.date)
.where(M.date >= cut_off)
)
return q.label("match_count")
def test():
Base.metadata.drop_all()
Base.metadata.create_all()
from sys import version_info as py_version
from sqlalchemy import __version__ as sa_version
print(f"PY version={py_version}")
print(f"SA version={sa_version}")
print(f"SA engine={engine.name}")
print("=" * 80)
# 1. test data
matches = [
Match(date=_date("2020-01-01")),
Match(date=_date("2020-01-02")),
Match(date=_date("2020-01-03")),
Match(date=_date("2020-01-05")),
Match(date=_date("2020-01-05")),
Match(date=_date("2020-01-10")),
]
session.add_all(matches)
session.commit()
print("=" * 80)
# 2. test query in "in-memory"
for m in session.query(Match):
print(m, m.match_count(3))
print("=" * 80)
# 3. test query on "SQL"
session.expunge_all()
q = session.query(Match, Match.match_count(3))
for match, match_count in q:
print(match, match_count)
print("=" * 80)
if __name__ == "__main__":
test()
============================================================
PY version=sys.version_info(major=3, minor=8, micro=1, releaselevel='final', serial=0)
SA version=1.3.20
SA engine=postgresql
============================================================
<Match(date=datetime.date(2020, 1, 1), match_id=1)> 0
<Match(date=datetime.date(2020, 1, 2), match_id=2)> 1
<Match(date=datetime.date(2020, 1, 3), match_id=3)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=4)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=5)> 3
<Match(date=datetime.date(2020, 1, 10), match_id=6)> 0
============================================================
<Match(date=datetime.date(2020, 1, 1), match_id=1)> 0
<Match(date=datetime.date(2020, 1, 2), match_id=2)> 1
<Match(date=datetime.date(2020, 1, 3), match_id=3)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=4)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=5)> 3
<Match(date=datetime.date(2020, 1, 10), match_id=6)> 0
============================================================
SELECT match.match_id,
match.date,
(SELECT count(other.match_id) AS count_1
FROM match AS other
WHERE other.match_id < match.match_id
AND other.date <= match.date
AND other.date >= match.date - %(date_1)s) AS match_count
FROM match