Postgresql SQLite';Docker中的全局写锁
我有一个Postgresql SQLite';Docker中的全局写锁,postgresql,sqlite,docker,flask,Postgresql,Sqlite,Docker,Flask,我有一个Flask应用程序,在docker环境中工作,有两个SQAlchemy数据库: Postgres(基于网络) Sqlite(基于文件) 这是我的docker compose配置,在MacOS中运行LinuxVM: services: web: build: context: ./services/web dockerfile: Dockerfile-dev volumes:
Flask
应用程序,在docker
环境中工作,有两个SQAlchemy
数据库:
Postgres
(基于网络)Sqlite
(基于文件)这是我的
docker compose
配置,在MacOS
中运行Linux
VM:
services:
web:
build:
context: ./services/web
dockerfile: Dockerfile-dev
volumes:
- './services/web:/usr/src/app'
ports:
- 5001:5000
environment:
- FLASK_ENV=development
- APP_SETTINGS=project.config.DevelopmentConfig
- DATABASE_URL=postgres://postgres:postgres@web-db:5432/web_dev
- DATABASE_TEST_URL=postgres://postgres:postgres@web-db:5432/web_test
- DATABASE_INDEX_URL=sqlite:////usr/src/app/project/db/index/searchindex.db
- SECRET_KEY=my_precious
depends_on:
- web-db
- redis
worker-scraping:
image: dev3_web
restart: always
volumes:
- ./services/web:/usr/src/app
- ./services/web/celery_logs:/usr/src/app/celery_logs
command: celery worker -A celery_worker.celery --loglevel=DEBUG --logfile=celery_logs/worker_scraping.log -Q scraping
environment:
- CELERY_BROKER=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- FLASK_ENV=development
- APP_SETTINGS=project.config.DevelopmentConfig
- DATABASE_URL=postgres://postgres:postgres@web-db:5432/web_dev
- DATABASE_TEST_URL=postgres://postgres:postgres@web-db:5432/web_test
- DATABASE_INDEX_URL=sqlite:////usr/src/app/project/db/index/searchindex.db
- SECRET_KEY=my_precious
depends_on:
- web
- redis
- web-db
links:
- redis:redis
- web-db:web-db
config.py
class DevelopmentConfig(BaseConfig):
"""Development configuration"""
SECRET_KEY = os.environ.get('SECRET_KEY')
#sqlalchemy
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL')
SQLALCHEMY_DATABASE_INDEX_URI = os.environ.get('DATABASE_INDEX_URL')
SQLALCHEMY_BINDS = {
'db1': SQLALCHEMY_DATABASE_URI,
'db2': SQLALCHEMY_DATABASE_INDEX_URI
}
CELERY_BROKER_URL = os.environ.get('CELERY_BROKER')
CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND')
CELERY_IMPORTS = ('project.api.routes.background',)
class crawler:
# Initialize the crawler with the name of database
def __init__(self, dbname):
self.con = sqlite3.connect(
dbname,
timeout=10)
def __del__(self):
self.con.close()
def dbcommit(self):
self.con.commit()
import os
# third party libs
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from celery import Celery
from flask_migrate import Migrate
# background processes instance
celery = Celery(__name__, broker='redis://redis:6379/0')
# extensions
migrate = Migrate()
def create_app(script_info=None):
from .api.models.base import db
from .api import routes, models
# instantiate the app
app = Flask(__name__)
# set config
app_settings = os.getenv('APP_SETTINGS')
app.config.from_object(app_settings)
# set up extensions
migrate.init_app(app, db)
routes.init_app(app)
models.init_app(app)
celery.conf.update(app.config)
# shell context for flask cli
@app.shell_context_processor
def ctx():
return {'app': app, 'db': db}
return app
我的sqlite
连接应该通过芹菜
后台进程建立,如下所示:
@celery.task(
queue='scraping',
default_retry_delay=30,
max_retries=3,
soft_time_limit=100000)
def scrape_allblogs():
# celery worker -A celery_worker.celery --loglevel=INFO -Q scraping
blog_crawler = crawler(current_app.config.get('SQLALCHEMY_DATABASE_INDEX_URI'))
blog_crawler.create_index_tables() #DONE once
for pagelist in all_pagelists:
blog_crawler.crawl([pagelist])
return {'Status': 'Blogs indexing updated!'}
上述创建和填充数据库的方法是如何定义的:
搜索引擎.py
class DevelopmentConfig(BaseConfig):
"""Development configuration"""
SECRET_KEY = os.environ.get('SECRET_KEY')
#sqlalchemy
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL')
SQLALCHEMY_DATABASE_INDEX_URI = os.environ.get('DATABASE_INDEX_URL')
SQLALCHEMY_BINDS = {
'db1': SQLALCHEMY_DATABASE_URI,
'db2': SQLALCHEMY_DATABASE_INDEX_URI
}
CELERY_BROKER_URL = os.environ.get('CELERY_BROKER')
CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND')
CELERY_IMPORTS = ('project.api.routes.background',)
class crawler:
# Initialize the crawler with the name of database
def __init__(self, dbname):
self.con = sqlite3.connect(
dbname,
timeout=10)
def __del__(self):
self.con.close()
def dbcommit(self):
self.con.commit()
import os
# third party libs
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from celery import Celery
from flask_migrate import Migrate
# background processes instance
celery = Celery(__name__, broker='redis://redis:6379/0')
# extensions
migrate = Migrate()
def create_app(script_info=None):
from .api.models.base import db
from .api import routes, models
# instantiate the app
app = Flask(__name__)
# set config
app_settings = os.getenv('APP_SETTINGS')
app.config.from_object(app_settings)
# set up extensions
migrate.init_app(app, db)
routes.init_app(app)
models.init_app(app)
celery.conf.update(app.config)
# shell context for flask cli
@app.shell_context_processor
def ctx():
return {'app': app, 'db': db}
return app
最后,这是我初始化应用程序的方式:
\uuuu init\uuuuu.py
class DevelopmentConfig(BaseConfig):
"""Development configuration"""
SECRET_KEY = os.environ.get('SECRET_KEY')
#sqlalchemy
SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL')
SQLALCHEMY_DATABASE_INDEX_URI = os.environ.get('DATABASE_INDEX_URL')
SQLALCHEMY_BINDS = {
'db1': SQLALCHEMY_DATABASE_URI,
'db2': SQLALCHEMY_DATABASE_INDEX_URI
}
CELERY_BROKER_URL = os.environ.get('CELERY_BROKER')
CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND')
CELERY_IMPORTS = ('project.api.routes.background',)
class crawler:
# Initialize the crawler with the name of database
def __init__(self, dbname):
self.con = sqlite3.connect(
dbname,
timeout=10)
def __del__(self):
self.con.close()
def dbcommit(self):
self.con.commit()
import os
# third party libs
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from celery import Celery
from flask_migrate import Migrate
# background processes instance
celery = Celery(__name__, broker='redis://redis:6379/0')
# extensions
migrate = Migrate()
def create_app(script_info=None):
from .api.models.base import db
from .api import routes, models
# instantiate the app
app = Flask(__name__)
# set config
app_settings = os.getenv('APP_SETTINGS')
app.config.from_object(app_settings)
# set up extensions
migrate.init_app(app, db)
routes.init_app(app)
models.init_app(app)
celery.conf.update(app.config)
# shell context for flask cli
@app.shell_context_processor
def ctx():
return {'app': app, 'db': db}
return app
尽管在
config.py
中声明了d1
和d2
,但到目前为止,我只将模型绑定到我的Postgres
数据库(d1
),因为我在这个开发阶段只需要访问d2
:
class User(db.Model):
__tablename__ = 'users'
__bind_key__ = 'db1'
当我打开容器时,会在正确的路径上创建一个空的
searchindex.db
文件,但我得到以下错误:
[2020-02-17 18:55:47,033: DEBUG/MainProcess] pidbox received method enable_events() [reply_to:None ticket:None]
[2020-02-17 18:55:47,062: ERROR/ForkPoolWorker-1] Task project.api.routes.background.scrape_allblogs[6b02fe39-a51c-4a5d-8ee8-13af3a15180e] raised unexpected: OperationalError('unable to open database file')
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/celery/app/trace.py", line 385, in trace_task
R = retval = fun(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/celery/app/trace.py", line 648, in __protected_call__
return self.run(*args, **kwargs)
File "/usr/src/app/project/api/routes/background.py", line 905, in scrape_allblogs
blog_crawler = crawler(current_app.config.get('SQLALCHEMY_DATABASE_INDEX_URI'))
File "/usr/src/app/project/api/classifiers/nlp/searchengine.py", line 201, in __init__
timeout=10)
sqlite3.OperationalError: unable to open database file
我错过了什么?如何解锁我的sqlite数据库?如果需要在两个容器之间共享数据库,第二个PostgreSQL数据库将比尝试共享sqlite数据库更好。然后,您可以使用普通的网络数据库I/O,而不是尝试在容器和服务器之间共享文件。如果你想在Docker Swarm或Kubernetes这样的分布式环境中使用这两个容器,而这两个容器可能不在同一个物理节点上运行,那么这一点就更加正确了。非常感谢链接。没有两个进程会写入sqlite,但是,只会从中读取。写入此sqlite将是一项周期性任务,仅用于页面排名目的,其他容器只能从中读取。你认为把所有内容都改成postgres值得吗?@DavidMaze你怎么看?避免在微服务之间共享文件系统内容仍然是最佳做法。我的分布式集群评论仍然适用。这一点在一些地方也有讨论,例如,即使该站点完全不是Docker-specific.Yes。将基于Docker(或基于Kubernetes)的集群设置与非Docker数据库(可能是云托管的数据库,如AWS RDS)混合使用是完全合理的,因为数据库的状态和生命周期要求与应用程序容器非常不同。