用Python生成睡眠日志可视化

用Python生成睡眠日志可视化,python,data-visualization,Python,Data Visualization,我有一个睡眠时间信息的数据集,并希望使用Python对其进行可视化 我拥有的.csv数据集如下所示: SleepStartDate、SleepStartTime、SleepStopTime 17/03/2017,23:45,07:25 19/03/2017,01:05,09:10 2017年3月19日23:50,08:25 我想要制作的可视化应该类似于以下内容: 图像来源: 我知道这是一个非常简单的可视化,想象一下它被构建到了一些已经存在的库中,但我在谷歌上的最大努力就是找不到它。如果有人能给我

我有一个睡眠时间信息的数据集,并希望使用Python对其进行可视化

我拥有的
.csv
数据集如下所示:

SleepStartDate、SleepStartTime、SleepStopTime
17/03/2017,23:45,07:25
19/03/2017,01:05,09:10
2017年3月19日23:50,08:25

我想要制作的可视化应该类似于以下内容: 图像来源:

我知道这是一个非常简单的可视化,想象一下它被构建到了一些已经存在的库中,但我在谷歌上的最大努力就是找不到它。如果有人能给我指出正确的方向,我将不胜感激


提前感谢您的时间和智慧。

经典的Python选择是使用该软件包。查看示例图,它看起来像一个垂直条形图

找不到一个具有我想要的功能的库,所以最后编写了一个脚本来为自己完成:

脚本:

import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import matplotlib.patches as patches
import datetime as dt
import csv
import sys

MINUTES_IN_DAY = 1440.0
COLUMN_COLOUR = 'b'

# Graph data using matplotlib visualization
def plotData(data,columnColour,maxDate,minDate): 


    # Set up an invisible background scatterplot give graph the correct size
    # Make a series of events that are one day apart 
    x = mpl.dates.drange(minDate,maxDate,dt.timedelta(days=1))

    # Offset first event to top of graph to give correct height
    x[0] += 0.85

    # Extract the time using a modulo 1, and adding an arbitrary base date
    # int used so that y-axis starts at midnight
    times = x % 1 + int(x[0])
    
    fig = plt.figure()
    fig.suptitle('Daily Sleep Patterns', fontsize=14, fontweight='bold')
    ax = fig.add_subplot(111)

    # Set background scatterplot to invisible 
    ax.plot_date(x, times, 'ro', color='w', visible=False)

    ax.yaxis_date()
    fig.autofmt_xdate()

    start, end = ax.get_ylim()

    # Fix division sizes and labels to show hours on y-axis
    hourDivision = 1.0 / 24.0
    ax.yaxis.set_ticks(np.arange(start,end,hourDivision))
    ax.set_yticklabels(['Midnight','1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','Midday','1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm','Midnight'])

    # Iterate through data 
    for i in range(0,len(data)):

        # If period starts and finishes on different days, slit and add to both days
        if data[i].startTime > data[i].stopTime:

            currentDataItem = data[i]

            currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
          
            currentDate -= dt.timedelta(days=0.5)

            tomorrow = currentDate + dt.timedelta(days=1)
            
            plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=1, facecolor=columnColour, alpha=0.5)


            theDayAfterTomorrow = tomorrow + dt.timedelta(days=1)


            plt.axvspan(xmin=tomorrow, xmax=theDayAfterTomorrow, ymin=0, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)

        # Else, add to given day
        else:

            currentDataItem = data[i]

            currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
          
            currentDate -= dt.timedelta(days=0.5)

            tomorrow = currentDate + dt.timedelta(days=1)
            

            plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)

    ax.set_ylabel('Hours',fontweight='bold')

    #ax.legend()
    ax.grid(True)

    plt.show()

# Read data from csv file
def readDataFromFile(dataFile):
    f = open(dataFile,'rt')
    listOfInputLists = []
    try:
        reader = csv.reader(f)
        for row in reader:
            listOfInputLists.append(row)
    finally:
        f.close()
    return listOfInputLists

# Class to store time and date data read from file
class sleepInstance(object):
    def __init__(self,listOfInputLists):
        self.day = 0
        self.month = 0
        self.year = 0
        self.formatDate(listOfInputLists[0])
        self.startTime = self.formatTime(listOfInputLists[1])
        self.stopTime = self.formatTime(listOfInputLists[2])

    # Extracts date information variables
    def formatDate(self,unformattedDate):
        date = dt.datetime.strptime(unformattedDate,"%d/%m/%y")
        self.day = int(date.strftime("%d"))
        self.month = int(date.strftime("%m"))
        self.year = int(date.strftime("%Y"))

    # Formats time as a decimal fraction of day, for use in graph
    def formatTime(self,unformattedTime):
        timeSinceMidnight = dt.datetime.strptime(unformattedTime,'%H:%M:%S')
        midnight = dt.datetime(1900,1,1)
        minutesSinceMidnight = ((timeSinceMidnight - midnight).total_seconds() / 60.0)
        fractionOfDay = minutesSinceMidnight / MINUTES_IN_DAY
        return fractionOfDay

# Formats data read from file as a list of sleepInstance objects
def formatDataForPlot(listOfInputLists):
    sleeps = []
    for i in range(1,len(listOfInputLists)):
        sleeps.append(sleepInstance(listOfInputLists[i]))
    return sleeps

# Extracts earliest (min) and latest (max) dates from data, for use in setting graph limits
def getMaxAndMinDates(plotDataList):
    dateTimeList = []
    for item in plotDataList:
        nextDate = dt.datetime(item.year,item.month,item.day)
        dateTimeList.append(nextDate)
    maxDate = max(dateTimeList)
    minDate = min(dateTimeList)
    return maxDate, minDate

dataFile = 'sleepData.csv'
listOfInputLists = readDataFromFile(dataFile)
plotDataList = formatDataForPlot(listOfInputLists)
maxDate, minDate = getMaxAndMinDates(plotDataList)
plotData(plotDataList,COLUMN_COLOUR,maxDate,minDate)
Date,Start,Finish
17/03/17,03:15:00,03:55:00
17/03/17,06:20:00,06:35:00
17/03/17,09:00:00,09:40:00
17/03/17,13:10:00,13:35:00
17/03/17,15:45:00,16:30:00
17/03/17,18:45:00,19:25:00
17/03/17,21:15:00,21:35:00
18/03/17,00:30:00,02:00:00
18/03/17,04:50:00,05:05:00
18/03/17,08:20:00,08:40:00
18/03/17,12:30:00,13:10:00
18/03/17,16:30:00,17:00:00
18/03/17,18:45:00,19:00:00
18/03/17,20:30:00,21:00:00
19/03/17,00:00:00,12:00:00
19/03/17,18:00:00,23:59:00
19/03/17,13:00:00,14:00:00
20/03/17,12:00:00,11:00:00
输入:

import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import matplotlib.patches as patches
import datetime as dt
import csv
import sys

MINUTES_IN_DAY = 1440.0
COLUMN_COLOUR = 'b'

# Graph data using matplotlib visualization
def plotData(data,columnColour,maxDate,minDate): 


    # Set up an invisible background scatterplot give graph the correct size
    # Make a series of events that are one day apart 
    x = mpl.dates.drange(minDate,maxDate,dt.timedelta(days=1))

    # Offset first event to top of graph to give correct height
    x[0] += 0.85

    # Extract the time using a modulo 1, and adding an arbitrary base date
    # int used so that y-axis starts at midnight
    times = x % 1 + int(x[0])
    
    fig = plt.figure()
    fig.suptitle('Daily Sleep Patterns', fontsize=14, fontweight='bold')
    ax = fig.add_subplot(111)

    # Set background scatterplot to invisible 
    ax.plot_date(x, times, 'ro', color='w', visible=False)

    ax.yaxis_date()
    fig.autofmt_xdate()

    start, end = ax.get_ylim()

    # Fix division sizes and labels to show hours on y-axis
    hourDivision = 1.0 / 24.0
    ax.yaxis.set_ticks(np.arange(start,end,hourDivision))
    ax.set_yticklabels(['Midnight','1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','Midday','1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm','Midnight'])

    # Iterate through data 
    for i in range(0,len(data)):

        # If period starts and finishes on different days, slit and add to both days
        if data[i].startTime > data[i].stopTime:

            currentDataItem = data[i]

            currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
          
            currentDate -= dt.timedelta(days=0.5)

            tomorrow = currentDate + dt.timedelta(days=1)
            
            plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=1, facecolor=columnColour, alpha=0.5)


            theDayAfterTomorrow = tomorrow + dt.timedelta(days=1)


            plt.axvspan(xmin=tomorrow, xmax=theDayAfterTomorrow, ymin=0, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)

        # Else, add to given day
        else:

            currentDataItem = data[i]

            currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
          
            currentDate -= dt.timedelta(days=0.5)

            tomorrow = currentDate + dt.timedelta(days=1)
            

            plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)

    ax.set_ylabel('Hours',fontweight='bold')

    #ax.legend()
    ax.grid(True)

    plt.show()

# Read data from csv file
def readDataFromFile(dataFile):
    f = open(dataFile,'rt')
    listOfInputLists = []
    try:
        reader = csv.reader(f)
        for row in reader:
            listOfInputLists.append(row)
    finally:
        f.close()
    return listOfInputLists

# Class to store time and date data read from file
class sleepInstance(object):
    def __init__(self,listOfInputLists):
        self.day = 0
        self.month = 0
        self.year = 0
        self.formatDate(listOfInputLists[0])
        self.startTime = self.formatTime(listOfInputLists[1])
        self.stopTime = self.formatTime(listOfInputLists[2])

    # Extracts date information variables
    def formatDate(self,unformattedDate):
        date = dt.datetime.strptime(unformattedDate,"%d/%m/%y")
        self.day = int(date.strftime("%d"))
        self.month = int(date.strftime("%m"))
        self.year = int(date.strftime("%Y"))

    # Formats time as a decimal fraction of day, for use in graph
    def formatTime(self,unformattedTime):
        timeSinceMidnight = dt.datetime.strptime(unformattedTime,'%H:%M:%S')
        midnight = dt.datetime(1900,1,1)
        minutesSinceMidnight = ((timeSinceMidnight - midnight).total_seconds() / 60.0)
        fractionOfDay = minutesSinceMidnight / MINUTES_IN_DAY
        return fractionOfDay

# Formats data read from file as a list of sleepInstance objects
def formatDataForPlot(listOfInputLists):
    sleeps = []
    for i in range(1,len(listOfInputLists)):
        sleeps.append(sleepInstance(listOfInputLists[i]))
    return sleeps

# Extracts earliest (min) and latest (max) dates from data, for use in setting graph limits
def getMaxAndMinDates(plotDataList):
    dateTimeList = []
    for item in plotDataList:
        nextDate = dt.datetime(item.year,item.month,item.day)
        dateTimeList.append(nextDate)
    maxDate = max(dateTimeList)
    minDate = min(dateTimeList)
    return maxDate, minDate

dataFile = 'sleepData.csv'
listOfInputLists = readDataFromFile(dataFile)
plotDataList = formatDataForPlot(listOfInputLists)
maxDate, minDate = getMaxAndMinDates(plotDataList)
plotData(plotDataList,COLUMN_COLOUR,maxDate,minDate)
Date,Start,Finish
17/03/17,03:15:00,03:55:00
17/03/17,06:20:00,06:35:00
17/03/17,09:00:00,09:40:00
17/03/17,13:10:00,13:35:00
17/03/17,15:45:00,16:30:00
17/03/17,18:45:00,19:25:00
17/03/17,21:15:00,21:35:00
18/03/17,00:30:00,02:00:00
18/03/17,04:50:00,05:05:00
18/03/17,08:20:00,08:40:00
18/03/17,12:30:00,13:10:00
18/03/17,16:30:00,17:00:00
18/03/17,18:45:00,19:00:00
18/03/17,20:30:00,21:00:00
19/03/17,00:00:00,12:00:00
19/03/17,18:00:00,23:59:00
19/03/17,13:00:00,14:00:00
20/03/17,12:00:00,11:00:00
输出:

import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import matplotlib.patches as patches
import datetime as dt
import csv
import sys

MINUTES_IN_DAY = 1440.0
COLUMN_COLOUR = 'b'

# Graph data using matplotlib visualization
def plotData(data,columnColour,maxDate,minDate): 


    # Set up an invisible background scatterplot give graph the correct size
    # Make a series of events that are one day apart 
    x = mpl.dates.drange(minDate,maxDate,dt.timedelta(days=1))

    # Offset first event to top of graph to give correct height
    x[0] += 0.85

    # Extract the time using a modulo 1, and adding an arbitrary base date
    # int used so that y-axis starts at midnight
    times = x % 1 + int(x[0])
    
    fig = plt.figure()
    fig.suptitle('Daily Sleep Patterns', fontsize=14, fontweight='bold')
    ax = fig.add_subplot(111)

    # Set background scatterplot to invisible 
    ax.plot_date(x, times, 'ro', color='w', visible=False)

    ax.yaxis_date()
    fig.autofmt_xdate()

    start, end = ax.get_ylim()

    # Fix division sizes and labels to show hours on y-axis
    hourDivision = 1.0 / 24.0
    ax.yaxis.set_ticks(np.arange(start,end,hourDivision))
    ax.set_yticklabels(['Midnight','1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','Midday','1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm','Midnight'])

    # Iterate through data 
    for i in range(0,len(data)):

        # If period starts and finishes on different days, slit and add to both days
        if data[i].startTime > data[i].stopTime:

            currentDataItem = data[i]

            currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
          
            currentDate -= dt.timedelta(days=0.5)

            tomorrow = currentDate + dt.timedelta(days=1)
            
            plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=1, facecolor=columnColour, alpha=0.5)


            theDayAfterTomorrow = tomorrow + dt.timedelta(days=1)


            plt.axvspan(xmin=tomorrow, xmax=theDayAfterTomorrow, ymin=0, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)

        # Else, add to given day
        else:

            currentDataItem = data[i]

            currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
          
            currentDate -= dt.timedelta(days=0.5)

            tomorrow = currentDate + dt.timedelta(days=1)
            

            plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)

    ax.set_ylabel('Hours',fontweight='bold')

    #ax.legend()
    ax.grid(True)

    plt.show()

# Read data from csv file
def readDataFromFile(dataFile):
    f = open(dataFile,'rt')
    listOfInputLists = []
    try:
        reader = csv.reader(f)
        for row in reader:
            listOfInputLists.append(row)
    finally:
        f.close()
    return listOfInputLists

# Class to store time and date data read from file
class sleepInstance(object):
    def __init__(self,listOfInputLists):
        self.day = 0
        self.month = 0
        self.year = 0
        self.formatDate(listOfInputLists[0])
        self.startTime = self.formatTime(listOfInputLists[1])
        self.stopTime = self.formatTime(listOfInputLists[2])

    # Extracts date information variables
    def formatDate(self,unformattedDate):
        date = dt.datetime.strptime(unformattedDate,"%d/%m/%y")
        self.day = int(date.strftime("%d"))
        self.month = int(date.strftime("%m"))
        self.year = int(date.strftime("%Y"))

    # Formats time as a decimal fraction of day, for use in graph
    def formatTime(self,unformattedTime):
        timeSinceMidnight = dt.datetime.strptime(unformattedTime,'%H:%M:%S')
        midnight = dt.datetime(1900,1,1)
        minutesSinceMidnight = ((timeSinceMidnight - midnight).total_seconds() / 60.0)
        fractionOfDay = minutesSinceMidnight / MINUTES_IN_DAY
        return fractionOfDay

# Formats data read from file as a list of sleepInstance objects
def formatDataForPlot(listOfInputLists):
    sleeps = []
    for i in range(1,len(listOfInputLists)):
        sleeps.append(sleepInstance(listOfInputLists[i]))
    return sleeps

# Extracts earliest (min) and latest (max) dates from data, for use in setting graph limits
def getMaxAndMinDates(plotDataList):
    dateTimeList = []
    for item in plotDataList:
        nextDate = dt.datetime(item.year,item.month,item.day)
        dateTimeList.append(nextDate)
    maxDate = max(dateTimeList)
    minDate = min(dateTimeList)
    return maxDate, minDate

dataFile = 'sleepData.csv'
listOfInputLists = readDataFromFile(dataFile)
plotDataList = formatDataForPlot(listOfInputLists)
maxDate, minDate = getMaxAndMinDates(plotDataList)
plotData(plotDataList,COLUMN_COLOUR,maxDate,minDate)
Date,Start,Finish
17/03/17,03:15:00,03:55:00
17/03/17,06:20:00,06:35:00
17/03/17,09:00:00,09:40:00
17/03/17,13:10:00,13:35:00
17/03/17,15:45:00,16:30:00
17/03/17,18:45:00,19:25:00
17/03/17,21:15:00,21:35:00
18/03/17,00:30:00,02:00:00
18/03/17,04:50:00,05:05:00
18/03/17,08:20:00,08:40:00
18/03/17,12:30:00,13:10:00
18/03/17,16:30:00,17:00:00
18/03/17,18:45:00,19:00:00
18/03/17,20:30:00,21:00:00
19/03/17,00:00:00,12:00:00
19/03/17,18:00:00,23:59:00
19/03/17,13:00:00,14:00:00
20/03/17,12:00:00,11:00:00


当我有时间的时候,可能会想一想:

快速谷歌搜索指向。看起来你可能需要看看这个部分。谢谢。plot.ly看起来很棒。应该有我想要的,虽然我不确定我能在散点图部分找到它。谢谢你的建议,尽管我想我会先给plot.ly一个机会,因为它看起来更漂亮,文档记录也更好。