Javascript TensorFlow.js和复杂数据集?

Javascript TensorFlow.js和复杂数据集?,javascript,tensorflow,tensorflow.js,Javascript,Tensorflow,Tensorflow.js,我正在着手开发一个利用TensorFlow(尤其是TensorFlow.js库)的应用程序 如果我只有两个数据轴(进程、感知皮肤外观),我已经遍历了并使其工作 // Visualize Data ========================================================== // function CreateModel() { // Create a sequential model const model = tf.sequential

我正在着手开发一个利用TensorFlow(尤其是TensorFlow.js库)的应用程序

如果我只有两个数据轴(进程、感知皮肤外观),我已经遍历了并使其工作

// Visualize Data ========================================================== //

function CreateModel() {
    // Create a sequential model
    const model = tf.sequential();

    // Add a single hidden layer
    model.add(tf.layers.dense({ inputShape: [1], units: 1, useBias: true }));

    // Add an output layer
    model.add(tf.layers.dense({ units: 1, useBias: true }));

    return model;
}





function ConvertToTensor(data) {
    return tf.tidy(() => {
        // Shuffle the data    
        tf.util.shuffle(data);

        // Convert data to Tensor
        const inputs = data.map(d => parseInt(d.progression));
        const labels = data.map(d => parseInt(d.perceivedSkinAppearance));

        const inputTensor = tf.tensor2d(inputs, [inputs.length, 1]);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

        console.log(inputTensor);
        console.log(labelTensor);

        // Normalize the data to the range 0 - 1 using min-max scaling
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();
        const labelMax = labelTensor.max();
        const labelMin = labelTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin,
        }
    });
}




async function TrainModel(model, inputs, labels) {
    model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
        metrics: ['mse'],
    });

    const batchSize = 32;
    const epochs = 50;

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}





function TestModel(model, inputData, normalizationData) {
    const { inputMax, inputMin, labelMin, labelMax } = normalizationData;

    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling 
    // that we did earlier.
    const [xs, preds] = tf.tidy(() => {

        const xs = tf.linspace(0, 1, 100);
        const preds = model.predict(xs.reshape([100, 1]));

        const unNormXs = xs
            .mul(inputMax.sub(inputMin))
            .add(inputMin);

        const unNormPreds = preds
            .mul(labelMax.sub(labelMin))
            .add(labelMin);

        // Un-normalize the data
        return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });


    const predictedPoints = Array.from(xs).map((val, i) => {
        return { x: val, y: preds[i] }
    });

    const originalPoints = inputData.map(d => ({
        x: parseInt(d.progression), 
        y: parseInt(d.perceivedSkinAppearance)
    }));

    tfvis.render.scatterplot(
        { name: 'Model Predictions vs Original Data' },
        { values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
        {
            xLabel: 'Progression',
            yLabel: 'Perceived Skin Appearance',
            height: 300
        }
    );
}





async function VisualizeData() {
    // Load and plot the original input data that we are going to train on.
    const data = await appData.read("conditions", "created");
    const values = data.map(d => ({
        x: (d.progression / 86400000),
        y: d.perceivedSkinAppearance
    }));

    tfvis.render.scatterplot(
        { name: 'Skin Appearance vs Progression' },
        { values },
        {
            xLabel: 'Progression',
            yLabel: 'Appearance',
            height: 300
        }
    );

    const model = CreateModel();
    tfvis.show.modelSummary({ name: 'Model Summary' }, model);

    const tensorData = ConvertToTensor(data);
    const { inputs, labels } = tensorData;

    await TrainModel(model, inputs, labels);
    console.log('Done Training');

    TestModel(model, data, tensorData);
}
如果我有一个复杂得多的数据集,我很难理解如何解决这个问题,比如:

  {
  "a25bfa27-4447-3a54-d2c5-29685b0dbed3" : {
    "affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "79916df1-99d8-4ec6-8bc0-531c9c9725c8", "23a220e8-cfff-4dd0-87c3-066f11d99506", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
    "created" : "2019-07-29 18:58:37",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 18,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MTgzMDM=",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTI0OTk=",
    "notes" : "",
    "observed" : "2019-07-29 18:58:00",
    "observer" : "b2JzZXJ2ZXJ8fHw0WDlqT1Nlem10U0ltVkdRRWk4MEZKZHRoMEsz",
    "perceivedSkinAppearance" : "3",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 186544718618,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 109.4,
    "temperatureMinimum" : 102.99,
    "temperatureObserved" : 106.21,
    "triggersEncountered" : [ "1cfb8826-58ad-4168-905c-6f6150d3618e", "928915de-aadc-45e4-b386-4df7fcbf9787" ],
    "uvIndexObserved" : 11.31
  },
  "d6604849-a6ed-0fef-4541-ba6b65e8ffa2" : {
    "affectedAreas" : [ "361106d9-5bc1-42ab-a52d-8b23eb2ed923", "b0b72048-393f-4980-b649-c764aed50c1d", "3df1c2a4-a7d5-4a8f-8753-eef9d3c44e76" ],
    "created" : "2019-07-17 15:43:46",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 26,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDYyMTg2Mjg5NDQ3",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTE4MDEyMTY3NzIx",
    "notes" : "",
    "observed" : "2019-07-17 15:43:00",
    "observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
    "perceivedSkinAppearance" : "3",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 185496227507,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 106,
    "temperatureMinimum" : 100,
    "temperatureObserved" : 103.15,
    "triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
    "uvIndexObserved" : 11.57
  },
  "fe5e995d-8b89-c6a7-23b5-3fb27112a92b" : {
    "created" : "2019-06-30 16:13:26",
    "gender" : "Z2VuZGVyfHx8ZmVtYWxl",
    "humidityObserved" : 12,
    "locationLatitude" : "bG9jYXRpb25MYXRpdHVkZXx8fDMzLjI2MDY0Njc1MDIzMjAz",
    "locationLongitude" : "bG9jYXRpb25Mb25naXR1ZGV8fHwtMTExLjgwNTEyNTkxNDk3NTA0",
    "notes" : "",
    "observed" : "2019-06-30 16:13:00",
    "observer" : "b2JzZXJ2ZXJ8fHxGZkducU1tUVlGVE9QQUZ3Wjc3THpwMEFCNHMx",
    "perceivedSkinAppearance" : "1",
    "perceivedSkinSensation" : "3",
    "perceivedSkinTexture" : "3",
    "pollenCountObserved" : 0,
    "progression" : 184029207516,
    "subject" : "c3ViamVjdHx8fDg0NGRmNmQyLTBjOTUtNDE3ZS1hYWUxLTc5ZjUxNjM1OWMyMw==",
    "temperatureMaximum" : 105.01,
    "temperatureMinimum" : 95,
    "temperatureObserved" : 99.95,
    "triggersEncountered" : [ "f756a7af-6a3d-4e48-998d-d706eac68e09" ],
    "uvIndexObserved" : 11.28
  }
}
注意:明显的散列值将在 实际上使用它们,所以不要担心它们是奇怪的数据类型


更新 我更新了代码以反映映射的建议更改,现在CreateModel、TestModel和TrainModel方法出现错误,因为这些模型显然不希望我的新数据inputShape

这是我的更新代码:

var mappingIndex = 0;
var mappingDictionary = []; 

function MapToDictionary(stringToFind, uniquePrepend) {
    var output = 0;
    if (stringToFind) 
    {
        if (uniquePrepend)
        {
            stringToFind = uniquePrepend + stringToFind;
        }

        var queryResult = mappingDictionary.filter(obj => Object.values(obj).some(val => val?val.toString().toLowerCase().includes(stringToFind):false))[0];
        if (queryResult) {
            output = queryResult["Key"];
        }
        else {
            mappingIndex = mappingIndex + 1;
            var mappingDictionaryEntry = {};
            mappingDictionaryEntry.Key = mappingIndex;
            mappingDictionaryEntry.Value = stringToFind;
            mappingDictionary.push(mappingDictionaryEntry);
            output = mappingIndex;
        }
        console.log(stringToFind + ": " + output);
        return output;
    }   
}


// Visualize Data ========================================================== //

function CreateModel() {
    // Create a sequential model
    const model = tf.sequential();

    // Add a single hidden layer
    model.add(tf.layers.dense({ inputShape: [3,16], units: 1, useBias: true }));

    // Add an output layer
    model.add(tf.layers.dense({ units: 1, useBias: true }));

    return model;
}





function ConvertToTensor(data) {
    return tf.tidy(() => {
        // Shuffle the data    
        tf.util.shuffle(data);

        console.log(data);
        // Convert data to Tensor
        const inputs = data.map(d => [
            MapToDictionary(d.affectedAreas, "affectedAreas"),
            MapToDictionary(d.gender, "gender"),
            parseInt(d.humidityObserved),
            parseInt(d.locationLatitude),
            parseInt(d.locationLongitude),
            parseInt(d.observed),
            parseInt(d.perceivedSkinAppearance),
            parseInt(d.perceivedSkinSensation),
            parseInt(d.perceivedSkinTexture),
            parseInt(d.progression),
            MapToDictionary(d.subject, "subject"),
            parseInt(d.temperatureMaximum),
            parseInt(d.temperatureMinimum),
            parseInt(d.temperatureObserved),
            MapToDictionary(d.triggersEncountered, "triggersEncountered"),
            parseInt(d.uvIndexObserved)
        ]);
        const labels = data.map(d => parseInt(d.progression));

        const inputTensor = tf.tensor2d(inputs);
        const labelTensor = tf.tensor2d(labels, [labels.length, 1]);

        // Normalize the data to the range 0 - 1 using min-max scaling
        const inputMax = inputTensor.max();
        const inputMin = inputTensor.min();
        const labelMax = labelTensor.max();
        const labelMin = labelTensor.min();

        const normalizedInputs = inputTensor.sub(inputMin).div(inputMax.sub(inputMin));
        const normalizedLabels = labelTensor.sub(labelMin).div(labelMax.sub(labelMin));

        return {
            inputs: normalizedInputs,
            labels: normalizedLabels,
            inputMax,
            inputMin,
            labelMax,
            labelMin,
        }
    });
}




async function TrainModel(model, inputs, labels) {
    model.compile({
        optimizer: tf.train.adam(),
        loss: tf.losses.meanSquaredError,
        metrics: ['mse'],
    });

    const batchSize = 32;
    const epochs = 50;

    return await model.fit(inputs, labels, {
        batchSize,
        epochs,
        shuffle: true,
        callbacks: tfvis.show.fitCallbacks(
            { name: 'Training Performance' },
            ['loss', 'mse'],
            { height: 200, callbacks: ['onEpochEnd'] }
        )
    });
}





function TestModel(model, inputData, normalizationData) {
    const { inputMax, inputMin, labelMin, labelMax } = normalizationData;

    // Generate predictions for a uniform range of numbers between 0 and 1;
    // We un-normalize the data by doing the inverse of the min-max scaling 
    // that we did earlier.
    const [xs, preds] = tf.tidy(() => {

        const xs = tf.linspace(0, 1, 100);
        const preds = model.predict(xs.reshape([100, 1]));

        const unNormXs = xs
            .mul(inputMax.sub(inputMin))
            .add(inputMin);

        const unNormPreds = preds
            .mul(labelMax.sub(labelMin))
            .add(labelMin);

        // Un-normalize the data
        return [unNormXs.dataSync(), unNormPreds.dataSync()];
    });


    const predictedPoints = Array.from(xs).map((val, i) => {
        return { x: val, y: preds[i] }
    });

    const originalPoints = inputData.map(d => ({
        x: parseInt(d.progression), 
        y: parseInt(d.perceivedSkinAppearance)
    }));

    tfvis.render.scatterplot(
        { name: 'Original vs. Predictions' },
        { values: [originalPoints, predictedPoints], series: ['original', 'predicted'] },
        {
            xLabel: 'Original',
            yLabel: 'Predicted',
            height: 300
        }
    );
}





async function VisualizeData() {
    // Load and plot the original input data that we are going to train on.
    const data = await appData.read("conditions", "created");
    const values = data.map(d => ({
        x: (d.progression / 86400000),
        y: d.perceivedSkinAppearance
    }));

    tfvis.render.scatterplot(
        { name: 'Skin Condition vs. Progression' },
        { values },
        {
            xLabel: 'Condition',
            yLabel: 'Progression',
            height: 300
        }
    );

    const model = CreateModel();
    tfvis.show.modelSummary({ name: 'Model Summary' }, model);

    const tensorData = ConvertToTensor(data);
    const { inputs, labels } = tensorData;

    await TrainModel(model, inputs, labels);
    console.log('Done Training');

    TestModel(model, data, tensorData);
}
我得到的错误是:

未捕获(承诺中)错误:检查输入时出错:应为 密集的1维输入具有3维。但是得到了一个有形状的数组 3,16


Tensorflow.js使用向量。即使在您的简单示例中,也可以从对象数组创建向量(张量)

代码示例

在您的示例中,您正在使用以下代码(简化)创建秩2张量(二维):

const输入=[1,2,3];//示例输入
常量inputSensor=tf.tensor2d(输入,[inputs.length,1]);//张量:[[1],[2],[3]]
下面的代码是另一种编写方法,它可以使正在发生的事情更加明显。在本例中,我们已经将第二个维度添加到JavaScript数组中,可以选择将其作为第二个参数传递(如上所述)

const inputs=[[1]、[2]、[3];
常量输入传感器=tf.tensor2d(输入);//张量:[[1],[2],[3]]
添加更多值

要向输入向量添加更多值,可以将它们添加到
输入
变量:

const inputs=[[1,4]、[2,5]、[3,6];
常量输入传感器=tf.tensor2d(输入);//张量:[[1,4],[2,5],[3,6]]
在代码中,您可以在以下行中执行此操作:

const inputs=data.map(d=>[
帕塞因特(d.progression),
parseInt(d.另一个值),
帕西恩特(d.ThirdataAttribute)
]);

这将不是返回单个值,而是为每行返回一个包含三个值的数组。要使代码适应三个值,现在必须相应地更改
inputShape
。关于数据类型,您仍然需要使用数字。这意味着您输入的所有值都需要转换为数字。

所以,您不能有一些是字符串值吗?例如,一堆是guid。我如何将这些转换为数字?i、 e.如果我需要它根据特定主题(即GUID)的输入进行学习,那会怎样呢?@eat sleep code No,你不能有这种意义上的字符串。这取决于您如何对字符串进行编码的用例。例如,您可以使用字典为每个字符串指定一个数字。@eat sleep code有一些项目()承担了您的负担,但它们都有特定的用例。谢谢,这让我找到了正确的方向。但是,见上面更新的问题还有一些问题。