Javascript TensorFlow.js和复杂数据集?
我正在着手开发一个利用TensorFlow(尤其是TensorFlow.js库)的应用程序 如果我只有两个数据轴(进程、感知皮肤外观),我已经遍历了并使其工作Javascript TensorFlow.js和复杂数据集?,javascript,tensorflow,tensorflow.js,Javascript,Tensorflow,Tensorflow.js,我正在着手开发一个利用TensorFlow(尤其是TensorFlow.js库)的应用程序 如果我只有两个数据轴(进程、感知皮肤外观),我已经遍历了并使其工作 // Visualize Data ========================================================== // function CreateModel() { // Create a sequential model const model = tf.sequential
// Visualize Data ========================================================== //
function CreateModel() {
// Create a sequential model
const model = tf.sequential();
// Add a single hidden layer
model.add(tf.layers.dense({ inputShape: [1], units: 1, useBias: true }));
// Add an output layer
model.add(tf.layers.dense({ units: 1, useBias: true }));
return model;
}
function ConvertToTensor(data) {
return tf.tidy(() => {
// Shuffle the data
tf.util.shuffle(data);
// Convert data to Tensor
const inputs = data.map(d => parseInt(d.progression));
const labels = data.map(d => parseInt(d.perceivedSkinAppearance));
const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
const labelTensor = tf.tensor2d(labels, [labels.length, 1]);
console.log(inputTensor);
console.log(labelTensor);
// Normalize the data to the range 0 - 1 using min-max scaling
const inputMax = inputTensor.max();
const inputMin = inputTensor.min();
const labelMax = labelTensor.max();
const labelMin = labelTensor.min();
const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));
return {
inputs: normalizedInputs,
labels: normalizedLabels,
inputMax,
inputMin,
labelMax,
labelMin,
}
});
}
async function TrainModel(model, inputs, labels) {
model.compile({
optimizer: tf.train.adam(),
loss: tf.losses.meanSquaredError,
metrics: ['mse'],
});
const batchSize = 32;
const epochs = 50;
return await model.fit(inputs, labels, {
batchSize,
epochs,
shuffle: true,
callbacks: tfvis.show.fitCallbacks(
{ name: 'Training Performance' },
['loss', 'mse'],
{ height: 200, callbacks: ['onEpochEnd'] }
)
});
}
function TestModel(model, inputData, normalizationData) {
const { inputMax, inputMin, labelMin, labelMax } = normalizationData;
// Generate predictions for a uniform range of numbers between 0 and 1;
// We un-normalize the data by doing the inverse of the min-max scaling
// that we did earlier.
const [xs, preds] = tf.tidy(() => {
const xs = tf.linspace(0, 1, 100);
const preds = model.predict(xs.reshape([100, 1]));
const unNormXs = xs
.mul(inputMax.sub(inputMin))
.add(inputMin);
const unNormPreds = preds
.mul(labelMax.sub(labelMin))
.add(labelMin);
// Un-normalize the data
return [unNormXs.dataSync(), unNormPreds.dataSync()];
});
const predictedPoints = Array.from(xs).map((val, i) => {
return { x: val, y: preds[i] }
});
const originalPoints = inputData.map(d => ({
x: parseInt(d.progression),
y: parseInt(d.perceivedSkinAppearance)
}));
tfvis.render.scatterplot(
{ name: 'Model Predictions vs Original Data' },
{ values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
{
xLabel: 'Progression',
yLabel: 'Perceived Skin Appearance',
height: 300
}
);
}
async function VisualizeData() {
// Load and plot the original input data that we are going to train on.
const data = await appData.read("conditions", "created");
const values = data.map(d => ({
x: (d.progression / 86400000),
y: d.perceivedSkinAppearance
}));
tfvis.render.scatterplot(
{ name: 'Skin Appearance vs Progression' },
{ values },
{
xLabel: 'Progression',
yLabel: 'Appearance',
height: 300
}
);
const model = CreateModel();
tfvis.show.modelSummary({ name: 'Model Summary' }, model);
const tensorData = ConvertToTensor(data);
const { inputs, labels } = tensorData;
await TrainModel(model, inputs, labels);
console.log('Done Training');
TestModel(model, data, tensorData);
}
如果我有一个复杂得多的数据集,我很难理解如何解决这个问题,比如:
{
"a25bfa27-4447-3a54-d2c5-29685b0dbed3" : {
"affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "79916df1-99d8-4ec6-8bc0-531c9c9725c8", "23a220e8-cfff-4dd0-87c3-066f11d99506", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
"created" : "2019-07-29 18:58:37",
"gender" : "Z2VuZGVyfHx8ZmVtYWxl",
"humidityObserved" : 18,
"locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MTgzMDM=",
"locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTI0OTk=",
"notes" : "",
"observed" : "2019-07-29 18:58:00",
"observer" : "b2JzZXJ2ZXJ8fHw0WDlqT1Nlem10U0ltVkdRRWk4MEZKZHRoMEsz",
"perceivedSkinAppearance" : "3",
"perceivedSkinSensation" : "3",
"perceivedSkinTexture" : "3",
"pollenCountObserved" : 0,
"progression" : 186544718618,
"subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
"temperatureMaximum" : 109.4,
"temperatureMinimum" : 102.99,
"temperatureObserved" : 106.21,
"triggersEncountered" : [ "1cfb8826-58ad-4168-905c-6f6150d3618e", "928915de-aadc-45e4-b386-4df7fcbf9787" ],
"uvIndexObserved" : 11.31
},
"d6604849-a6ed-0fef-4541-ba6b65e8ffa2" : {
"affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "b0b72048-393f-4980-b649-c764aed50c1d", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
"created" : "2019-07-17 15:43:46",
"gender" : "Z2VuZGVyfHx8ZmVtYWxl",
"humidityObserved" : 26,
"locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDYyMTg2Mjg5NDQ3",
"locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTE4MDEyMTY3NzIx",
"notes" : "",
"observed" : "2019-07-17 15:43:00",
"observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
"perceivedSkinAppearance" : "3",
"perceivedSkinSensation" : "3",
"perceivedSkinTexture" : "3",
"pollenCountObserved" : 0,
"progression" : 185496227507,
"subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
"temperatureMaximum" : 106,
"temperatureMinimum" : 100,
"temperatureObserved" : 103.15,
"triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
"uvIndexObserved" : 11.57
},
"fe5e995d-8b89-c6a7-23b5-3fb27112a92b" : {
"created" : "2019-06-30 16:13:26",
"gender" : "Z2VuZGVyfHx8ZmVtYWxl",
"humidityObserved" : 12,
"locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDY0Njc1MDIzMjAz",
"locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTEyNTkxNDk3NTA0",
"notes" : "",
"observed" : "2019-06-30 16:13:00",
"observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
"perceivedSkinAppearance" : "1",
"perceivedSkinSensation" : "3",
"perceivedSkinTexture" : "3",
"pollenCountObserved" : 0,
"progression" : 184029207516,
"subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
"temperatureMaximum" : 105.01,
"temperatureMinimum" : 95,
"temperatureObserved" : 99.95,
"triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
"uvIndexObserved" : 11.28
}
}
注意:明显的散列值将在
实际上使用它们,所以不要担心它们是奇怪的数据类型
更新 我更新了代码以反映映射的建议更改,现在CreateModel、TestModel和TrainModel方法出现错误,因为这些模型显然不希望我的新数据inputShape 这是我的更新代码:
var mappingIndex = 0;
var mappingDictionary = [];
function MapToDictionary(stringToFind, uniquePrepend) {
var output = 0;
if (stringToFind)
{
if (uniquePrepend)
{
stringToFind = uniquePrepend + stringToFind;
}
var queryResult = mappingDictionary.filter(obj => Object.values(obj).some(val => val?val.toString().toLowerCase().includes(stringToFind):false))[0];
if (queryResult) {
output = queryResult["Key"];
}
else {
mappingIndex = mappingIndex + 1;
var mappingDictionaryEntry = {};
mappingDictionaryEntry.Key = mappingIndex;
mappingDictionaryEntry.Value = stringToFind;
mappingDictionary.push(mappingDictionaryEntry);
output = mappingIndex;
}
console.log(stringToFind + ": " + output);
return output;
}
}
// Visualize Data ========================================================== //
function CreateModel() {
// Create a sequential model
const model = tf.sequential();
// Add a single hidden layer
model.add(tf.layers.dense({ inputShape: [3,16], units: 1, useBias: true }));
// Add an output layer
model.add(tf.layers.dense({ units: 1, useBias: true }));
return model;
}
function ConvertToTensor(data) {
return tf.tidy(() => {
// Shuffle the data
tf.util.shuffle(data);
console.log(data);
// Convert data to Tensor
const inputs = data.map(d => [
MapToDictionary(d.affectedAreas, "affectedAreas"),
MapToDictionary(d.gender, "gender"),
parseInt(d.humidityObserved),
parseInt(d.locationLatitude),
parseInt(d.locationLongitude),
parseInt(d.observed),
parseInt(d.perceivedSkinAppearance),
parseInt(d.perceivedSkinSensation),
parseInt(d.perceivedSkinTexture),
parseInt(d.progression),
MapToDictionary(d.subject, "subject"),
parseInt(d.temperatureMaximum),
parseInt(d.temperatureMinimum),
parseInt(d.temperatureObserved),
MapToDictionary(d.triggersEncountered, "triggersEncountered"),
parseInt(d.uvIndexObserved)
]);
const labels = data.map(d => parseInt(d.progression));
const inputTensor = tf.tensor2d(inputs);
const labelTensor = tf.tensor2d(labels, [labels.length, 1]);
// Normalize the data to the range 0 - 1 using min-max scaling
const inputMax = inputTensor.max();
const inputMin = inputTensor.min();
const labelMax = labelTensor.max();
const labelMin = labelTensor.min();
const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));
return {
inputs: normalizedInputs,
labels: normalizedLabels,
inputMax,
inputMin,
labelMax,
labelMin,
}
});
}
async function TrainModel(model, inputs, labels) {
model.compile({
optimizer: tf.train.adam(),
loss: tf.losses.meanSquaredError,
metrics: ['mse'],
});
const batchSize = 32;
const epochs = 50;
return await model.fit(inputs, labels, {
batchSize,
epochs,
shuffle: true,
callbacks: tfvis.show.fitCallbacks(
{ name: 'Training Performance' },
['loss', 'mse'],
{ height: 200, callbacks: ['onEpochEnd'] }
)
});
}
function TestModel(model, inputData, normalizationData) {
const { inputMax, inputMin, labelMin, labelMax } = normalizationData;
// Generate predictions for a uniform range of numbers between 0 and 1;
// We un-normalize the data by doing the inverse of the min-max scaling
// that we did earlier.
const [xs, preds] = tf.tidy(() => {
const xs = tf.linspace(0, 1, 100);
const preds = model.predict(xs.reshape([100, 1]));
const unNormXs = xs
.mul(inputMax.sub(inputMin))
.add(inputMin);
const unNormPreds = preds
.mul(labelMax.sub(labelMin))
.add(labelMin);
// Un-normalize the data
return [unNormXs.dataSync(), unNormPreds.dataSync()];
});
const predictedPoints = Array.from(xs).map((val, i) => {
return { x: val, y: preds[i] }
});
const originalPoints = inputData.map(d => ({
x: parseInt(d.progression),
y: parseInt(d.perceivedSkinAppearance)
}));
tfvis.render.scatterplot(
{ name: 'Original vs. Predictions' },
{ values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
{
xLabel: 'Original',
yLabel: 'Predicted',
height: 300
}
);
}
async function VisualizeData() {
// Load and plot the original input data that we are going to train on.
const data = await appData.read("conditions", "created");
const values = data.map(d => ({
x: (d.progression / 86400000),
y: d.perceivedSkinAppearance
}));
tfvis.render.scatterplot(
{ name: 'Skin Condition vs. Progression' },
{ values },
{
xLabel: 'Condition',
yLabel: 'Progression',
height: 300
}
);
const model = CreateModel();
tfvis.show.modelSummary({ name: 'Model Summary' }, model);
const tensorData = ConvertToTensor(data);
const { inputs, labels } = tensorData;
await TrainModel(model, inputs, labels);
console.log('Done Training');
TestModel(model, data, tensorData);
}
我得到的错误是:
未捕获(承诺中)错误:检查输入时出错:应为
密集的1维输入具有3维。但是得到了一个有形状的数组
3,16
Tensorflow.js使用向量。即使在您的简单示例中,也可以从对象数组创建向量(张量) 代码示例 在您的示例中,您正在使用以下代码(简化)创建秩2张量(二维):
const输入=[1,2,3];//示例输入
常量inputSensor=tf.tensor2d(输入,[inputs.length,1]);//张量:[[1],[2],[3]]
下面的代码是另一种编写方法,它可以使正在发生的事情更加明显。在本例中,我们已经将第二个维度添加到JavaScript数组中,可以选择将其作为第二个参数传递(如上所述)
const inputs=[[1]、[2]、[3];
常量输入传感器=tf.tensor2d(输入);//张量:[[1],[2],[3]]
添加更多值
要向输入向量添加更多值,可以将它们添加到输入
变量:
const inputs=[[1,4]、[2,5]、[3,6];
常量输入传感器=tf.tensor2d(输入);//张量:[[1,4],[2,5],[3,6]]
在代码中,您可以在以下行中执行此操作:
const inputs=data.map(d=>[
帕塞因特(d.progression),
parseInt(d.另一个值),
帕西恩特(d.ThirdataAttribute)
]);
这将不是返回单个值,而是为每行返回一个包含三个值的数组。要使代码适应三个值,现在必须相应地更改
inputShape
。关于数据类型,您仍然需要使用数字。这意味着您输入的所有值都需要转换为数字。所以,您不能有一些是字符串值吗?例如,一堆是guid。我如何将这些转换为数字?i、 e.如果我需要它根据特定主题(即GUID)的输入进行学习,那会怎样呢?@eat sleep code No,你不能有这种意义上的字符串。这取决于您如何对字符串进行编码的用例。例如,您可以使用字典为每个字符串指定一个数字。@eat sleep code有一些项目()承担了您的负担,但它们都有特定的用例。谢谢,这让我找到了正确的方向。但是,见上面更新的问题还有一些问题。