Google app engine 使用JPA的Google应用程序引擎中的百万用户扇出
我试图用JPA解决GAE的百万扇出问题。如果我理解正确,我应该为Twitter之类的东西提供以下实体(只是一个示例):Google app engine 使用JPA的Google应用程序引擎中的百万用户扇出,google-app-engine,jpa,google-cloud-datastore,Google App Engine,Jpa,Google Cloud Datastore,我试图用JPA解决GAE的百万扇出问题。如果我理解正确,我应该为Twitter之类的东西提供以下实体(只是一个示例): 公共用户{ @Id密钥Id; 字符串名; 字符串显示名; 列出订阅者;//用户 } 公共推特{ @Id密钥Id; 用户tweetMaker; 字符串消息; } 公共推文索引{ @Id密钥Id; 关键字tweetMaker;//用户 列出订阅者;//用户 } 发出tweet时,保存tweet对象,并保存TweetIndex,其中tweetMaker是发出tweet的用户,订阅者
公共用户{
@Id密钥Id;
字符串名;
字符串显示名;
列出订阅者;//用户
}
公共推特{
@Id密钥Id;
用户tweetMaker;
字符串消息;
}
公共推文索引{
@Id密钥Id;
关键字tweetMaker;//用户
列出订阅者;//用户
}
发出tweet时,保存tweet对象,并保存TweetIndex,其中tweetMaker是发出tweet的用户,订阅者从用户对象复制到TweetIndex中。然后我会在TweetIndex中查询订户,以获取特定订户的消息
public class UserEntity {
@Id Key id;
String name;
/** INDEXED : to retrieve a user by display name */
String displayName;
/** For the sake of the example below */
int tweetCount;
/**
* USE CASE : See a user's followers from his "profile" page.
*
* Easily get subscribers data from your user entity.
* Duplicate UserEntity (this object) 's data in the UserSubscriberEntity.
* You just need to run an ancestor query on UserSubscriberEntity using the User id.
*/
List<UserSubscriberChildEntity> subscribers;
}
/** Duplicate user data in this entity, retrieved easily with an ancestor query */
public class UserSubscriberChildEntity {
/** The id of this entity */
@Id Key subscriberId;
/** Duplicate your User Entity data */
String name;
String displayName;
/** The id from the UserEntity referenced */
String userId;
}
public class TweetEntity {
@Id Key id;
/**
* The actual text message
*/
String tweetContent;
/**
* USE CASE : display the tweet maker name alongside the tweet content.
*
* Duplicate user data to prevent an expensive join when not needed.
* You will always need to display this along with the tweet content !
* Model your entity based on what you want to see when you display them
*/
String tweetMakerName;
String tweetMakerDisplayName;
/**
* USE CASE
* 1) to retrieve tweets MADE by a given user
* 2) In case you actually need to access the User entity
* (for example, if you remove this tweet and want to decrease the user tweet counter)
*
* INDEXED
*/
Key tweetMakerId;
/**
* USE CASE : display tweet subscribers from the "tweet page"
*
* Same as "UserSubscriberChildEntity", retrieve data fast by duplicating
*/
List<TweetSubscriberChildEntity> subscribers;
}
公共类用户实体{
@Id密钥Id;
字符串名;
/**索引:按显示名称检索用户*/
字符串显示名;
/**为了下面的例子*/
整数计数;
/**
*用例:从用户的“个人资料”页面查看用户的追随者。
*
*从用户实体轻松获取订阅者数据。
*UserSubscriberEntity中重复UserEntity(此对象)的数据。
*您只需要使用用户id在UserSubscriberEntity上运行祖先查询。
*/
列出订户名单;
}
/**此实体中存在重复的用户数据,可通过祖先查询轻松检索*/
公共类UserSubscriberChildEntity{
/**此实体的id*/
@Id密钥subscriberId;
/**复制您的用户实体数据*/
字符串名;
字符串显示名;
/**引用的UserEntity的id*/
字符串用户标识;
}
公共类tweet实体{
@Id密钥Id;
/**
*实际的文本消息
*/
字符串内容;
/**
*用例:在tweet内容旁边显示tweet生成器名称。
*
*复制用户数据以防止在不需要时进行昂贵的连接。
*您将始终需要将其与tweet内容一起显示!
*根据显示实体时希望看到的内容对实体进行建模
*/
字符串tweetMakerName;
字符串tweetMakerDisplayName;
/**
*用例
*1)检索给定用户发出的推文
*2)如果您实际需要访问用户实体
*(例如,如果删除此tweet并希望减少用户tweet计数器)
*
*索引
*/
关键字:makerid;
/**
*用例:从“tweet页面”显示tweet订户
*
*与“UserSubscriberChildEntity”相同,通过复制快速检索数据
*/
列出订户名单;
}
现在核心问题是:
如何检索“一个用户订阅的所有推文”
将订阅分成多个实体:
/**
* USE CASE : Retrieve tweets one user subscribed to
*
* Same goes for User subscription
*/
public class TweetSubscriptionShardedEntity {
/** unused */
@Id Key shardKey;
/** INDEXED : Tweet reference */
Key tweetId;
/** INDEXED : Users reference */
List<Key> userKeys;
/** INDEXED : subscriber count, to retrieve shards that are actually under the limitation of 20K */
int subscribersCount = 0;
/**
* Add a subscriber and increment the subscriberCount
*/
public void addSubscriber(Key userId) {
userKeys.add(userId);
subscribersCount++;
}
}
/**
* Pseudo code
*/
public class TweetService {
public List<TweetEntity> getTweetsSubscribed(Key userId) {
List<TweetEntity> tweetsFollowed = new ArrayList<TweetEntity>;
// Get all the subscriptions from a user
List<TweetSubscriberShardedEntity> shards = datastoreService.find("from TweetSubscriberShardedEntity where userKeys contains (userId)");
// Iterate over each subscription to retrieve the complete Tweet
for (TweetSubscriberShardedEntity shard : shards) {
TweetEntity tweet = datastoreService.get(TweetEntity.class, shard.getTweetId);
tweetsFollowed.add(tweet);
}
return tweetsFollowed;
}
public void subscribeToTweet(Key subscriberId, Key tweetId) {
TweetSubscriberShardedEntity shardToUse = null;
// Only get the first shard with under 20000 subscribers
TweetSubscriberShardedEntity shardNotFull = datastoreService.find("
FROM TweetSubscriberShardedEntity
WHERE tweetId == tweetId
AND userKeys contains (subscriberId)
AND subscribersCount < 20000
LIMIT 1");
if (shardNotFull == null) {
// If no shard exist create one
shardToUse = new TweetSubscriberShardedEntity();
}
else {
shardToUse = shardNotFull;
}
// Link user and tweet
shardToUse.setTweet(tweetId);
shardToUse.getUserKeys().add(subscriberId);
// Save shard
datastoreService.put(shardToUse);
}
/**
* Hard to put in a transaction with so many entities updated !
* See cross entity group docs for more info.
*/
public void createTweet(UserEntity creator, TweetEntity newTweet) {
creator.tweetCount++;
newTweet.tweetMakerName = creator.name;
newTweet.tweetMakerDisplayName = creator.displayName;
newTweet.tweetMakerId = creator.id;
// Duplicate User subscribers to Tweet
for(UserSubscriberChildEntity userSubscriber : creator.subcribers) {
// Create a Tweet child entity
TweetSubscriberChildEntity tweetSubscriber = new TweetSubscriberChildEntity();
tweetSubscriber.name = userSubscriber.name;
// ... (duplicate all data)
newTweet.add(tweetSubscriber);
// Create a shard with the previous method !!
subscribeToTweet(newTweet.id, subscriber.id);
}
// Update the user (tweet count)
datastoreService.put(creator);
// Create the new tweet and child entities (duplicated subscribers data)
datastoreService.put(newTweet);
}
}
/**
*用例:检索一个用户订阅的tweet
*
*用户订阅也是如此
*/
公共类TweetSubscriptionShardedEntity{
/**未使用*/
@Id密钥shardKey;
/**索引:Tweet引用*/
关键字tweetId;
/**索引:用户参考*/
列出用户密钥;
/**索引:订户计数,用于检索实际低于20K限制的碎片*/
int subscribersCount=0;
/**
*添加订阅服务器并增加订阅服务器计数
*/
public void addSubscriber(密钥用户ID){
userKeys.add(userId);
subscribersCount++;
}
}
将所有内容连接在一起的示例推文服务:
/**
* USE CASE : Retrieve tweets one user subscribed to
*
* Same goes for User subscription
*/
public class TweetSubscriptionShardedEntity {
/** unused */
@Id Key shardKey;
/** INDEXED : Tweet reference */
Key tweetId;
/** INDEXED : Users reference */
List<Key> userKeys;
/** INDEXED : subscriber count, to retrieve shards that are actually under the limitation of 20K */
int subscribersCount = 0;
/**
* Add a subscriber and increment the subscriberCount
*/
public void addSubscriber(Key userId) {
userKeys.add(userId);
subscribersCount++;
}
}
/**
* Pseudo code
*/
public class TweetService {
public List<TweetEntity> getTweetsSubscribed(Key userId) {
List<TweetEntity> tweetsFollowed = new ArrayList<TweetEntity>;
// Get all the subscriptions from a user
List<TweetSubscriberShardedEntity> shards = datastoreService.find("from TweetSubscriberShardedEntity where userKeys contains (userId)");
// Iterate over each subscription to retrieve the complete Tweet
for (TweetSubscriberShardedEntity shard : shards) {
TweetEntity tweet = datastoreService.get(TweetEntity.class, shard.getTweetId);
tweetsFollowed.add(tweet);
}
return tweetsFollowed;
}
public void subscribeToTweet(Key subscriberId, Key tweetId) {
TweetSubscriberShardedEntity shardToUse = null;
// Only get the first shard with under 20000 subscribers
TweetSubscriberShardedEntity shardNotFull = datastoreService.find("
FROM TweetSubscriberShardedEntity
WHERE tweetId == tweetId
AND userKeys contains (subscriberId)
AND subscribersCount < 20000
LIMIT 1");
if (shardNotFull == null) {
// If no shard exist create one
shardToUse = new TweetSubscriberShardedEntity();
}
else {
shardToUse = shardNotFull;
}
// Link user and tweet
shardToUse.setTweet(tweetId);
shardToUse.getUserKeys().add(subscriberId);
// Save shard
datastoreService.put(shardToUse);
}
/**
* Hard to put in a transaction with so many entities updated !
* See cross entity group docs for more info.
*/
public void createTweet(UserEntity creator, TweetEntity newTweet) {
creator.tweetCount++;
newTweet.tweetMakerName = creator.name;
newTweet.tweetMakerDisplayName = creator.displayName;
newTweet.tweetMakerId = creator.id;
// Duplicate User subscribers to Tweet
for(UserSubscriberChildEntity userSubscriber : creator.subcribers) {
// Create a Tweet child entity
TweetSubscriberChildEntity tweetSubscriber = new TweetSubscriberChildEntity();
tweetSubscriber.name = userSubscriber.name;
// ... (duplicate all data)
newTweet.add(tweetSubscriber);
// Create a shard with the previous method !!
subscribeToTweet(newTweet.id, subscriber.id);
}
// Update the user (tweet count)
datastoreService.put(creator);
// Create the new tweet and child entities (duplicated subscribers data)
datastoreService.put(newTweet);
}
}
/**
*伪码
*/
公共类推特服务{
公共列表getTweetsSubscribed(关键用户ID){
List tweetsFollowed=新建ArrayList;
//从用户处获取所有订阅
List shards=datastoreService.find(“来自TweetSubscriberShardedEntity,其中userKeys包含(userId)”;
//迭代每个订阅以检索完整的Tweet
for(TweetSubscriberShardedEntity碎片:碎片){
TweetEntity tweet=datastoreService.get(TweetEntity.class,shard.getTweetId);
tweetsFollowed.add(tweet);
}
返回允许的推文;
}
public void subscribeTweet(密钥subscriberId