Azure Cosmos inserts won';不能有效地并行化

Azure Cosmos inserts won';不能有效地并行化,azure,azure-cosmosdb,Azure,Azure Cosmosdb,元问题: 我们正在从EventHub中提取数据,运行一些逻辑,并将其保存到cosmos。目前,Cosmos插件是我们的瓶颈我们如何最大限度地提高吞吐量? 详细信息 我们正在尝试优化Cosmos吞吐量,SDK中似乎存在一些争用,使得并行插入的速度略快于串行插入。 从逻辑上讲,我们正在做: for (int i = 0; i < insertCount; i++) { taskList.Add(InsertCosm

元问题:
我们正在从EventHub中提取数据,运行一些逻辑,并将其保存到cosmos。目前,Cosmos插件是我们的瓶颈我们如何最大限度地提高吞吐量?

详细信息
我们正在尝试优化Cosmos吞吐量,SDK中似乎存在一些争用,使得并行插入的速度略快于串行插入。
从逻辑上讲,我们正在做:

            for (int i = 0; i < insertCount; i++)
            {
                taskList.Add(InsertCosmos(sdkContainerClient));
            }
            var parallelTimes = await Task.WhenAll(taskList);
  • 序列很明显(只需添加每个值)
  • 没有宇宙(最后一次计时)也是显而易见的(只需使用最小时间)
  • 但平行宇宙的平行化程度不太好,这表明存在一些争议
我们在Azure中的虚拟机(与Cosmos相同的数据中心)上运行此功能,有足够的RU,因此没有429,并使用Microsoft.Azure.Cosmos 3.2.0

完整代码示例

    class Program
    {
        public static void Main(string[] args)
        {
            CosmosWriteTest().Wait();
        }

        public static async Task CosmosWriteTest()
        {
            var cosmosClient = new CosmosClient("todo", new CosmosClientOptions { ConnectionMode = ConnectionMode.Direct });
            var database = cosmosClient.GetDatabase("<ourcontainer>");
            var sdkContainerClient = database.GetContainer("<ourcontainer>");
            int insertCount = 25;
            //Warmup
            await sdkContainerClient.CreateItemAsync(new TestObject());

            //---Serially inserts into Cosmos---
            List<long> serialTimes = new List<long>();
            var serialTimer = Stopwatch.StartNew();
            Console.WriteLine("Cosmos Serial");
            for (int i = 0; i < insertCount; i++)
            {
                serialTimes.Add(await InsertCosmos(sdkContainerClient));
            }
            serialTimer.Stop();
            Console.WriteLine($"Serial took: {serialTimer.ElapsedMilliseconds}ms for {insertCount}");
            Console.WriteLine($" - Individual times {string.Join(",", serialTimes)}");

            //---Parallel inserts into Cosmos---
            Console.WriteLine(Environment.NewLine + "Cosmos Parallel");
            var parallelTimer = Stopwatch.StartNew();
            var taskList = new List<Task<long>>();
            for (int i = 0; i < insertCount; i++)
            {
                taskList.Add(InsertCosmos(sdkContainerClient));
            }
            var parallelTimes = await Task.WhenAll(taskList);

            parallelTimer.Stop();
            Console.WriteLine($"Parallel took: {parallelTimer.ElapsedMilliseconds}ms for {insertCount}");
            Console.WriteLine($" - Individual times {string.Join(",", parallelTimes)}");

            //---Testing parallelism minus cosmos---
            Console.WriteLine(Environment.NewLine + "Just Parallel (no cosmos)");
            var justParallelTimer = Stopwatch.StartNew();
            var noCosmosTaskList = new List<Task<long>>();
            for (int i = 0; i < insertCount; i++)
            {
                noCosmosTaskList.Add(InsertCosmos(sdkContainerClient, true));
            }
            var justParallelTimes = await Task.WhenAll(noCosmosTaskList);

            justParallelTimer.Stop();
            Console.WriteLine($"Parallel took: {justParallelTimer.ElapsedMilliseconds}ms for {insertCount}");
            Console.WriteLine($" - Individual times {string.Join(",", justParallelTimes)}");
        }

        //inserts 
        private static async Task<long> InsertCosmos(Container sdkContainerClient, bool justDelay = false)
        {
            var timer = Stopwatch.StartNew();
            if (!justDelay)
                await sdkContainerClient.CreateItemAsync(new TestObject());
            else
                await Task.Delay(20);

            timer.Stop();
            return timer.ElapsedMilliseconds;
        }

        //Test object to save to Cosmos
        public class TestObject
        {
            public string id { get; set; } = Guid.NewGuid().ToString();
            public string pKey { get; set; } = Guid.NewGuid().ToString();
            public string Field1 { get; set; } = "Testing this field";
            public double Number { get; set; } = 12345;
        }
    }
类程序
{
公共静态void Main(字符串[]args)
{
CosmosWriteTest().Wait();
}
公共静态异步任务CosmosWriteTest()
{
var cosmosClient=new cosmosClient(“todo”,new CosmosClientOptions{ConnectionMode=ConnectionMode.Direct});
var database=cosmosClient.GetDatabase(“”);
var sdkContainerClient=database.GetContainer(“”);
int insertCount=25;
//热身
等待sdkContainerClient.CreateItemAsync(新的TestObject());
//---连续插入宇宙---
List serialTimes=新列表();
var serialTimer=Stopwatch.StartNew();
Console.WriteLine(“宇宙系列”);
for(int i=0;i
这是引入批量的场景。批量模式目前处于预览状态,可在软件包中使用

利用批量需要做的是打开
AllowBulkExecution
标志:

newcosmoclient(端点、authKey、newcosmoclientoptions(){AllowBulkExecution=true})

创建此模式是为了使您描述的场景受益,即需要吞吐量的并发操作列表

我们这里有一个示例项目:

我们仍在编写官方文档,但我们的想法是,当并发操作发出时,SDK不会像您现在看到的那样将它们作为单独的请求执行,而是根据分区关联将它们分组,并将它们作为分组(批处理)操作执行,减少后端服务调用,并根据操作量在50%-100%之间潜在地增加吞吐量。此模式将消耗更多的RU/s,因为它每秒推动的操作量高于单独发出的操作量(因此,如果达到429秒,则表示瓶颈现在位于已配置的RU/s上)

var-cosmosClient=new-cosmosClient(“todo”,new-CosmosClientOptions{AllowBulkExecution=true});
var database=cosmosClient.GetDatabase(“”);
var sdkContainerClient=database.GetContainer(“”);
//操作越多越好,与非批量操作相比,仅25次可能不会产生很大的差异
int insertCount=10000;
//不要做任何热身
列表操作=新建列表();
var timer=Stopwatch.StartNew();
for(int i=0;i
    class Program
    {
        public static void Main(string[] args)
        {
            CosmosWriteTest().Wait();
        }

        public static async Task CosmosWriteTest()
        {
            var cosmosClient = new CosmosClient("todo", new CosmosClientOptions { ConnectionMode = ConnectionMode.Direct });
            var database = cosmosClient.GetDatabase("<ourcontainer>");
            var sdkContainerClient = database.GetContainer("<ourcontainer>");
            int insertCount = 25;
            //Warmup
            await sdkContainerClient.CreateItemAsync(new TestObject());

            //---Serially inserts into Cosmos---
            List<long> serialTimes = new List<long>();
            var serialTimer = Stopwatch.StartNew();
            Console.WriteLine("Cosmos Serial");
            for (int i = 0; i < insertCount; i++)
            {
                serialTimes.Add(await InsertCosmos(sdkContainerClient));
            }
            serialTimer.Stop();
            Console.WriteLine($"Serial took: {serialTimer.ElapsedMilliseconds}ms for {insertCount}");
            Console.WriteLine($" - Individual times {string.Join(",", serialTimes)}");

            //---Parallel inserts into Cosmos---
            Console.WriteLine(Environment.NewLine + "Cosmos Parallel");
            var parallelTimer = Stopwatch.StartNew();
            var taskList = new List<Task<long>>();
            for (int i = 0; i < insertCount; i++)
            {
                taskList.Add(InsertCosmos(sdkContainerClient));
            }
            var parallelTimes = await Task.WhenAll(taskList);

            parallelTimer.Stop();
            Console.WriteLine($"Parallel took: {parallelTimer.ElapsedMilliseconds}ms for {insertCount}");
            Console.WriteLine($" - Individual times {string.Join(",", parallelTimes)}");

            //---Testing parallelism minus cosmos---
            Console.WriteLine(Environment.NewLine + "Just Parallel (no cosmos)");
            var justParallelTimer = Stopwatch.StartNew();
            var noCosmosTaskList = new List<Task<long>>();
            for (int i = 0; i < insertCount; i++)
            {
                noCosmosTaskList.Add(InsertCosmos(sdkContainerClient, true));
            }
            var justParallelTimes = await Task.WhenAll(noCosmosTaskList);

            justParallelTimer.Stop();
            Console.WriteLine($"Parallel took: {justParallelTimer.ElapsedMilliseconds}ms for {insertCount}");
            Console.WriteLine($" - Individual times {string.Join(",", justParallelTimes)}");
        }

        //inserts 
        private static async Task<long> InsertCosmos(Container sdkContainerClient, bool justDelay = false)
        {
            var timer = Stopwatch.StartNew();
            if (!justDelay)
                await sdkContainerClient.CreateItemAsync(new TestObject());
            else
                await Task.Delay(20);

            timer.Stop();
            return timer.ElapsedMilliseconds;
        }

        //Test object to save to Cosmos
        public class TestObject
        {
            public string id { get; set; } = Guid.NewGuid().ToString();
            public string pKey { get; set; } = Guid.NewGuid().ToString();
            public string Field1 { get; set; } = "Testing this field";
            public double Number { get; set; } = 12345;
        }
    }
var cosmosClient = new CosmosClient("todo", new CosmosClientOptions { AllowBulkExecution = true });
var database = cosmosClient.GetDatabase("<ourcontainer>");
var sdkContainerClient = database.GetContainer("<ourcontainer>");
//The more operations the better, just 25 might not yield a great difference vs non bulk
int insertCount = 10000;
//Don't do any warmup

List<Task> operations = new List<Tasks>();
var timer = Stopwatch.StartNew();
for (int i = 0; i < insertCount; i++)
{
    operations.Add(sdkContainerClient.CreateItemAsync(new TestObject()));
}

await Task.WhenAll(operations);
serialTimer.Stop();