
用Python标记ISCX僵尸网络数据集2014,python,dataset,labeling,botnet,Python,Dataset,Labeling,Botnet,我在使用他们网站上提供的CIC僵尸网络数据集的恶意IP信息时遇到问题。已提到如下: 培训数据集中僵尸网络类型的分布 僵尸网络名称|类型|数据集中流的一部分 Neris | IRC | 25967 (5.67%) Rbot | IRC | 83 (0.018%) Menti | IRC | 2878(0.62%) Sogou | HTTP | 89 (0.019%) Murlo | IRC | 4881 (1.06%) Virut | HTTP | 58576 (12.80%) NSIS | P2




Neris | IRC | 25967 (5.67%)
Rbot | IRC | 83 (0.018%)
Menti | IRC | 2878(0.62%)
Sogou | HTTP | 89 (0.019%)
Murlo | IRC | 4881 (1.06%)
Virut | HTTP | 58576 (12.80%)
NSIS | P2P | 757 (0.165%)
Zeus | P2P | 502 (0.109%)
SMTP Spam | P2P | 21633 (4.72%)
UDP Storm | P2P | 44062 (9.63%)
Tbot | IRC | 1296 (0.283%)
Zero Access | P2P | 1011 (0.221%)
Weasel | P2P | 42313 (9.25%)
Smoke Bot | P2P | 78 (0.017%)
Zeus Control (C&C) | P2P | 31 (0.006%)
ISCX IRC bot | P2P | 1816 (0.387%)
结果集被分为培训和测试数据集,分别包括7种和16种僵尸网络。表1和表2详细说明了每个数据集中僵尸网络的分布和类型。我们的培训数据集大小为5.3 GB,其中43.92%是恶意的,并且提醒包含正常流。测试数据集为8.5 GB,其中44.97%为恶意流。我们在测试数据集中添加了比训练数据集中更多的僵尸网络跟踪多样性,以评估特征子集可以提供的新颖性检测。 测试数据集中僵尸网络类型的分布


Neris | IRC | 25967 (5.67%)
Rbot | IRC | 83 (0.018%)
Menti | IRC | 2878(0.62%)
Sogou | HTTP | 89 (0.019%)
Murlo | IRC | 4881 (1.06%)
Virut | HTTP | 58576 (12.80%)
NSIS | P2P | 757 (0.165%)
Zeus | P2P | 502 (0.109%)
SMTP Spam | P2P | 21633 (4.72%)
UDP Storm | P2P | 44062 (9.63%)
Tbot | IRC | 1296 (0.283%)
Zero Access | P2P | 1011 (0.221%)
Weasel | P2P | 42313 (9.25%)
Smoke Bot | P2P | 78 (0.017%)
Zeus Control (C&C) | P2P | 31 (0.006%)
ISCX IRC bot | P2P | 1816 (0.387%)




import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

import numpy as np
import pandas as pd

from scipy import stats

# importing pandas module  
import pandas as pd  

# importing regex module 
import re 

# from tensorflow.keras import backend
from tensorflow.python.keras import backend

import matplotlib.pyplot as plt
%matplotlib inline

import xgboost as xgb

import pickle

import gc

# Load custom functions

import gan

# For reloading after making changes
import importlib
from gan import *

import pandas as pd

import timeit
# code you want to evaluate


begin_from_start = 0

take_chunk = 0 

if begin_from_start:

    data = pd.read_csv (r'ISCX_Botnet-Training.pcap_Flow.csv', low_memory=False)
    # data = data[0:50000] 


if begin_from_start:
    testing_data = pd.read_csv (r'ISCX_Botnet-Testing.pcap_Flow.csv', low_memory=False)
    # data = data[0:50000] 


if begin_from_start:

    botnet = True
    z_score = False

if begin_from_start:

    #replace inf with nan and then drop the rows with nans
    print("Null Values in data set: " + str(data.isnull().sum().sum()) )


    data = data.replace([np.inf, -np.inf], np.nan).dropna(how="any").reset_index(drop=True)

    print("Null Values in data set: " + str(data.isnull().sum().sum()) )


if begin_from_start:

    #replace inf with nan and then drop the rows with nans
    print("Null Values in data set: " + str(testing_data.isnull().sum().sum()) )


    testing_data = testing_data.replace([np.inf, -np.inf], np.nan).dropna(how="any").reset_index(drop=True)

    print("Null Values in data set: " + str(testing_data.isnull().sum().sum()) )


if begin_from_start:

    # data columns will be all the columns except Src IP, Src Port, Dsp IP, Dst Port
    # and Timestamp as we are not considering categorical and time stamp features.

    # data= data.drop(['Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Timestamp', 'Protocol', \
    #                  'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', \
    #                  'CWE Flag Count', 'ECE Flag Cnt'], axis=1)
    if botnet == True: 
    #     data= data.drop(['Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Timestamp', 'Protocol', 'Init Fwd Win Byts'], axis=1)
        data= data.drop(['Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Timestamp', 'Protocol'], axis=1)


        #In this cell we will find the indices in flows that will show the flows for the particular botnet

        df = data[['Flow ID', 'Label']]

        IRC_1 = df['Flow ID'].str.contains('')
        IRC_2 = df['Flow ID'].str.contains('')
        IRC_3 = df['Flow ID'].str.contains('')
        IRC_4 = df['Flow ID'].str.contains('')
        IRC_5 = df['Flow ID'].str.contains('')
        IRC_6 = df['Flow ID'].str.contains('')
        IRC_7 = df['Flow ID'].str.contains('')
        IRC_8 = df['Flow ID'].str.contains('')
        IRC_9 = df['Flow ID'].str.contains('')
        IRC_10 = df['Flow ID'].str.contains('')
        IRC_11 = df['Flow ID'].str.contains('')
        IRC_12 = df['Flow ID'].str.contains('')
        IRC_13 = df['Flow ID'].str.contains('')
        IRC_14 = df['Flow ID'].str.contains('')
        IRC_15 = df['Flow ID'].str.contains('')

        Neris = df['Flow ID'].str.contains('')
        RBot  = df['Flow ID'].str.contains('')
        Menti = df['Flow ID'].str.contains('')
        Sogou = df['Flow ID'].str.contains('')
        Murlo = df['Flow ID'].str.contains('')
        Virut = df['Flow ID'].str.contains('')
        IRCbot_and_black_hole_1 = df['Flow ID'].str.contains('')
        Black_hole_2 = df['Flow ID'].str.contains('')
        Black_hole_3 = df['Flow ID'].str.contains('')
        TBot_1 = df['Flow ID'].str.contains('')
        TBot_2 = df['Flow ID'].str.contains('')
        TBot_3 = df['Flow ID'].str.contains('')
        TBot_4 = df['Flow ID'].str.contains('')
        Weasel_master = df['Flow ID'].str.contains('')
        Weasel_bot = df['Flow ID'].str.contains('')
        Zeus_1  = df['Flow ID'].str.contains('')
        Zeus_2 = df['Flow ID'].str.contains('')
        Zeus_3 = df['Flow ID'].str.contains('')
        bin_Zeus = df['Flow ID'].str.contains('')
        Osx_trojan = df['Flow ID'].str.contains('')
        zero_access_1 = df['Flow ID'].str.contains('')
        zero_access_2 = df['Flow ID'].str.contains('')
        Smoke_bot = df['Flow ID'].str.contains('')

        indx_IRC_1 = [i for i, x in enumerate(IRC_1) if x]
        indx_IRC_2 = [i for i, x in enumerate(IRC_2) if x]
        indx_IRC_3 = [i for i, x in enumerate(IRC_3) if x]
        indx_IRC_4 = [i for i, x in enumerate(IRC_4) if x]
        indx_IRC_5 = [i for i, x in enumerate(IRC_5) if x]
        indx_IRC_6 = [i for i, x in enumerate(IRC_6) if x]
        indx_IRC_7 = [i for i, x in enumerate(IRC_7) if x]
        indx_IRC_8 = [i for i, x in enumerate(IRC_8) if x]
        indx_IRC_9 = [i for i, x in enumerate(IRC_9) if x]
        indx_IRC_10 = [i for i, x in enumerate(IRC_10) if x]
        indx_IRC_11 = [i for i, x in enumerate(IRC_11) if x]
        indx_IRC_12 = [i for i, x in enumerate(IRC_12) if x]
        indx_IRC_13 = [i for i, x in enumerate(IRC_13) if x]
        indx_IRC_14 = [i for i, x in enumerate(IRC_14) if x]
        indx_IRC_15 = [i for i, x in enumerate(IRC_15) if x]

        indx_Neris = [i for i, x in enumerate(Neris) if x]
        indx_RBot  = [i for i, x in enumerate(RBot) if x]
        indx_Menti = [i for i, x in enumerate(Menti) if x]
        indx_Sogou = [i for i, x in enumerate(Sogou) if x]
        indx_Murlo = [i for i, x in enumerate(Murlo) if x]
        indx_Virut = [i for i, x in enumerate(Virut) if x]
        indx_IRCbot_and_black_hole_1 = [i for i, x in enumerate(IRCbot_and_black_hole_1) if x]
        indx_Black_hole_2 = [i for i, x in enumerate(Black_hole_2) if x]
        indx_Black_hole_3 = [i for i, x in enumerate(Black_hole_3) if x]
        indx_TBot_1 = [i for i, x in enumerate(TBot_1) if x]
        indx_TBot_2 = [i for i, x in enumerate(TBot_2) if x]
        indx_TBot_3 = [i for i, x in enumerate(TBot_3) if x]
        indx_TBot_4 = [i for i, x in enumerate(TBot_4) if x]
        indx_Weasel_master = [i for i, x in enumerate(Weasel_master) if x]
        indx_Weasel_bot = [i for i, x in enumerate(Weasel_bot) if x]
        indx_Zeus_1  = [i for i, x in enumerate(Zeus_1) if x]
        indx_Zeus_2 = [i for i, x in enumerate(Zeus_2) if x]
        indx_Zeus_3 = [i for i, x in enumerate(Zeus_3) if x]
        indx_bin_Zeus = [i for i, x in enumerate(bin_Zeus) if x]
        indx_Osx_trojan = [i for i, x in enumerate(Osx_trojan) if x]
        indx_zero_access_1 = [i for i, x in enumerate(zero_access_1) if x]
        indx_zero_access_2 = [i for i, x in enumerate(zero_access_2) if x]
        indx_Smoke_bot = [i for i, x in enumerate(Smoke_bot) if x]
        indx_zero_access_1 = [i for i, x in enumerate(zero_access_1) if x]
        indx_zero_access_2 = [i for i, x in enumerate(zero_access_2) if x]
        indx_Smoke_bot = [i for i, x in enumerate(Smoke_bot) if x]

        total_instances = df.shape[0]
        print("Total Instances:" + str(total_instances))

        print("bin_IRC_1_Instances:" + str(len(indx_IRC_1))+ " ---> "+ str(round(len(indx_IRC_1)/total_instances*100, 4)) + " %")
        print("bin_IRC_2_Instances:" + str(len(indx_IRC_2))+ " ---> "+ str(round(len(indx_IRC_2)/total_instances*100, 4)) + " %")
        print("bin_IRC_3_Instances:" + str(len(indx_IRC_3))+ " ---> "+ str(round(len(indx_IRC_3)/total_instances*100, 4)) + " %")
        print("bin_IRC_4_Instances:" + str(len(indx_IRC_4))+ " ---> "+ str(round(len(indx_IRC_4)/total_instances*100, 4)) + " %")
        print("bin_IRC_5_Instances:" + str(len(indx_IRC_5))+ " ---> "+ str(round(len(indx_IRC_5)/total_instances*100, 4)) + " %")
        print("bin_IRC_6_Instances:" + str(len(indx_IRC_6))+ " ---> "+ str(round(len(indx_IRC_6)/total_instances*100, 4)) + " %")
        print("bin_IRC_7_Instances:" + str(len(indx_IRC_7))+ " ---> "+ str(round(len(indx_IRC_7)/total_instances*100, 4)) + " %")
        print("bin_IRC_8_Instances:" + str(len(indx_IRC_8))+ " ---> "+ str(round(len(indx_IRC_8)/total_instances*100, 4)) + " %")
        print("bin_IRC_9_Instances:" + str(len(indx_IRC_9))+ " ---> "+ str(round(len(indx_IRC_9)/total_instances*100, 4)) + " %")
        print("bin_IRC_10_Instances:" + str(len(indx_IRC_10))+ " ---> "+ str(round(len(indx_IRC_10)/total_instances*100, 4)) + " %")
        print("bin_IRC_11_Instances:" + str(len(indx_IRC_11))+ " ---> "+ str(round(len(indx_IRC_11)/total_instances*100, 4)) + " %")
        print("bin_IRC_12_Instances:" + str(len(indx_IRC_12))+ " ---> "+ str(round(len(indx_IRC_12)/total_instances*100, 4)) + " %")
        print("bin_IRC_13_Instances:" + str(len(indx_IRC_13))+ " ---> "+ str(round(len(indx_IRC_13)/total_instances*100, 4)) + " %")
        print("bin_IRC_14_Instances:" + str(len(indx_IRC_14))+ " ---> "+ str(round(len(indx_IRC_14)/total_instances*100, 4)) + " %")
        print("bin_IRC_15_Instances:" + str(len(indx_IRC_15))+ " ---> "+ str(round(len(indx_IRC_15)/total_instances*100, 4)) + " %")

        print("Neris_Instances:" + str(len(indx_Neris)) + " ---> "+ str(round(len(indx_Neris)/total_instances*100, 4)) + " %")
        print("RBot_Instances:" + str(len(indx_RBot)) + " ---> "+ str(round(len(indx_RBot)/total_instances*100, 4)) + " %")
        print("Menti_Instances:" + str(len(indx_Menti)) + " ---> "+ str(round(len(indx_Menti)/total_instances*100, 4)) + " %")
        print("Sogou_Instances:" + str(len(indx_Sogou)) + " ---> "+ str(round(len(indx_Sogou)/total_instances*100, 4)) + " %")
        print("Murlo_Instances:" + str(len(indx_Murlo)) + " ---> "+ str(round(len(indx_Murlo)/total_instances*100, 4)) + " %")
        print("Virut_Instances:" + str(len(indx_Virut)) + " ---> "+ str(round(len(indx_Virut)/total_instances*100, 4)) + " %")
        print("IRCbot_and_black_hole_1_Instances:" + str(len(indx_IRCbot_and_black_hole_1)) + " ---> "+ str(round(len(indx_IRCbot_and_black_hole_1)/total_instances*100, 4)) + " %")
        print("Black_hole_2_Instances:" + str(len(indx_Black_hole_2)) + " ---> "+ str(round(len(indx_Black_hole_2)/total_instances*100, 4)) + " %")
        print("Black_hole_3_Instances:" + str(len(indx_Black_hole_3)) + " ---> "+ str(round(len(indx_Black_hole_3)/total_instances*100, 4)) + " %")
        print("TBot_1_Instances:" + str(len(indx_TBot_1)) + " ---> "+ str(round(len(indx_TBot_1)/total_instances*100, 4)) + " %")
        print("TBot_2_Instances:" + str(len(indx_TBot_2)) + " ---> "+ str(round(len(indx_TBot_2)/total_instances*100, 4)) + " %")
        print("TBot_3_Instances:" + str(len(indx_TBot_3)) + " ---> "+ str(round(len(indx_TBot_3)/total_instances*100, 4)) + " %")
        print("TBot_4_Instances:" + str(len(indx_TBot_4)) + " ---> "+ str(round(len(indx_TBot_4)/total_instances*100, 4)) + " %")
        print("Weasel_master_Instances:" + str(len(indx_Weasel_master)) + " ---> "+ str(round(len(indx_Weasel_master)/total_instances*100, 4)) + " %")
        print("Weasel_bot_Instances:" + str(len(indx_Weasel_bot)) + " ---> "+ str(round(len(indx_Weasel_bot)/total_instances*100, 4)) + " %")
        print("Zeus_1_Instances:" + str(len(indx_Zeus_1)) + " ---> "+ str(round(len(indx_Zeus_1)/total_instances*100, 4)) + " %")
        print("Zeus_2_Instances:" + str(len(indx_Zeus_2)) + " ---> "+ str(round(round(len(indx_Zeus_2)/total_instances*100, 4), 2)) + " %")
        print("Zeus_3_Instances:" + str(len(indx_Zeus_3)) + " ---> "+ str(round(len(indx_Zeus_3)/total_instances*100, 4)) + " %")
        print("bin_Zeus_Instances:" + str(len(indx_Zeus_3)) + " ---> "+ str(round(len(indx_Zeus_3)/total_instances*100, 4)) + " %")
        print("Osx_trojan_Instances:" + str(len(indx_Osx_trojan)) + " ---> "+ str(round(len(indx_Osx_trojan)/total_instances*100, 4)) + " %")
        print("zero_access_1_Instances:" + str(len(indx_zero_access_1)) + " ---> "+ str(round(len(indx_zero_access_1)/total_instances*100, 4)) + " %")
        print("zero_access_2_Instances:" + str(len(indx_zero_access_2)) + " ---> "+ str(round(len(indx_zero_access_2)/total_instances*100, 4)) + " %")
        print("Smoke_bot_Instances:" + str(len(indx_Smoke_bot)) + " ---> "+ str(round(len(indx_Smoke_bot)/total_instances*100, 4)) + " %")

if begin_from_start:

    if botnet == True:

        # This cell labels the 'Label' column in the data frame to 1 where the particular botnet was found

        data.loc[:, 'Label'] = 0.0

        data.loc[indx_IRC_2, 'Label'] = 1
        data.loc[indx_IRC_3, 'Label'] = 1
        data.loc[indx_IRC_4, 'Label'] = 1
        data.loc[indx_IRC_5, 'Label'] = 1
        data.loc[indx_IRC_6, 'Label'] = 1
        data.loc[indx_IRC_7, 'Label'] = 1
        data.loc[indx_IRC_11, 'Label'] = 1
        data.loc[indx_IRC_15, 'Label'] = 1
        data.loc[indx_Neris, 'Label'] = 1

        data.loc[indx_RBot, 'Label'] = 1

        data.loc[indx_Virut, 'Label'] = 1

        data.loc[indx_Zeus_2, 'Label'] = 1

    #     print(data['Label'])


import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

import numpy as np
import pandas as pd

from scipy import stats

# importing pandas module  
import pandas as pd  

# importing regex module 
import re 

# from tensorflow.keras import backend
from tensorflow.python.keras import backend

import matplotlib.pyplot as plt
%matplotlib inline

import xgboost as xgb

import pickle

import gc

# Load custom functions

import gan

# For reloading after making changes
import importlib
from gan import *

import pandas as pd

import timeit
# code you want to evaluate


begin_from_start = 0

take_chunk = 0 

if begin_from_start:

    data = pd.read_csv (r'ISCX_Botnet-Training.pcap_Flow.csv', low_memory=False)
    # data = data[0:50000] 


if begin_from_start:
    testing_data = pd.read_csv (r'ISCX_Botnet-Testing.pcap_Flow.csv', low_memory=False)
    # data = data[0:50000] 


if begin_from_start:

    botnet = True
    z_score = False

if begin_from_start:

    #replace inf with nan and then drop the rows with nans
    print("Null Values in data set: " + str(data.isnull().sum().sum()) )


    data = data.replace([np.inf, -np.inf], np.nan).dropna(how="any").reset_index(drop=True)

    print("Null Values in data set: " + str(data.isnull().sum().sum()) )


if begin_from_start:

    #replace inf with nan and then drop the rows with nans
    print("Null Values in data set: " + str(testing_data.isnull().sum().sum()) )


    testing_data = testing_data.replace([np.inf, -np.inf], np.nan).dropna(how="any").reset_index(drop=True)

    print("Null Values in data set: " + str(testing_data.isnull().sum().sum()) )


if begin_from_start:

    # data columns will be all the columns except Src IP, Src Port, Dsp IP, Dst Port
    # and Timestamp as we are not considering categorical and time stamp features.

    # data= data.drop(['Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Timestamp', 'Protocol', \
    #                  'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', \
    #                  'CWE Flag Count', 'ECE Flag Cnt'], axis=1)
    if botnet == True: 
    #     data= data.drop(['Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Timestamp', 'Protocol', 'Init Fwd Win Byts'], axis=1)
        data= data.drop(['Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Timestamp', 'Protocol'], axis=1)


        #In this cell we will find the indices in flows that will show the flows for the particular botnet

        df = data[['Flow ID', 'Label']]

        IRC_1 = df['Flow ID'].str.contains('')
        IRC_2 = df['Flow ID'].str.contains('')
        IRC_3 = df['Flow ID'].str.contains('')
        IRC_4 = df['Flow ID'].str.contains('')
        IRC_5 = df['Flow ID'].str.contains('')
        IRC_6 = df['Flow ID'].str.contains('')
        IRC_7 = df['Flow ID'].str.contains('')
        IRC_8 = df['Flow ID'].str.contains('')
        IRC_9 = df['Flow ID'].str.contains('')
        IRC_10 = df['Flow ID'].str.contains('')
        IRC_11 = df['Flow ID'].str.contains('')
        IRC_12 = df['Flow ID'].str.contains('')
        IRC_13 = df['Flow ID'].str.contains('')
        IRC_14 = df['Flow ID'].str.contains('')
        IRC_15 = df['Flow ID'].str.contains('')

        Neris = df['Flow ID'].str.contains('')
        RBot  = df['Flow ID'].str.contains('')
        Menti = df['Flow ID'].str.contains('')
        Sogou = df['Flow ID'].str.contains('')
        Murlo = df['Flow ID'].str.contains('')
        Virut = df['Flow ID'].str.contains('')
        IRCbot_and_black_hole_1 = df['Flow ID'].str.contains('')
        Black_hole_2 = df['Flow ID'].str.contains('')
        Black_hole_3 = df['Flow ID'].str.contains('')
        TBot_1 = df['Flow ID'].str.contains('')
        TBot_2 = df['Flow ID'].str.contains('')
        TBot_3 = df['Flow ID'].str.contains('')
        TBot_4 = df['Flow ID'].str.contains('')
        Weasel_master = df['Flow ID'].str.contains('')
        Weasel_bot = df['Flow ID'].str.contains('')
        Zeus_1  = df['Flow ID'].str.contains('')
        Zeus_2 = df['Flow ID'].str.contains('')
        Zeus_3 = df['Flow ID'].str.contains('')
        bin_Zeus = df['Flow ID'].str.contains('')
        Osx_trojan = df['Flow ID'].str.contains('')
        zero_access_1 = df['Flow ID'].str.contains('')
        zero_access_2 = df['Flow ID'].str.contains('')
        Smoke_bot = df['Flow ID'].str.contains('')

        indx_IRC_1 = [i for i, x in enumerate(IRC_1) if x]
        indx_IRC_2 = [i for i, x in enumerate(IRC_2) if x]
        indx_IRC_3 = [i for i, x in enumerate(IRC_3) if x]
        indx_IRC_4 = [i for i, x in enumerate(IRC_4) if x]
        indx_IRC_5 = [i for i, x in enumerate(IRC_5) if x]
        indx_IRC_6 = [i for i, x in enumerate(IRC_6) if x]
        indx_IRC_7 = [i for i, x in enumerate(IRC_7) if x]
        indx_IRC_8 = [i for i, x in enumerate(IRC_8) if x]
        indx_IRC_9 = [i for i, x in enumerate(IRC_9) if x]
        indx_IRC_10 = [i for i, x in enumerate(IRC_10) if x]
        indx_IRC_11 = [i for i, x in enumerate(IRC_11) if x]
        indx_IRC_12 = [i for i, x in enumerate(IRC_12) if x]
        indx_IRC_13 = [i for i, x in enumerate(IRC_13) if x]
        indx_IRC_14 = [i for i, x in enumerate(IRC_14) if x]
        indx_IRC_15 = [i for i, x in enumerate(IRC_15) if x]

        indx_Neris = [i for i, x in enumerate(Neris) if x]
        indx_RBot  = [i for i, x in enumerate(RBot) if x]
        indx_Menti = [i for i, x in enumerate(Menti) if x]
        indx_Sogou = [i for i, x in enumerate(Sogou) if x]
        indx_Murlo = [i for i, x in enumerate(Murlo) if x]
        indx_Virut = [i for i, x in enumerate(Virut) if x]
        indx_IRCbot_and_black_hole_1 = [i for i, x in enumerate(IRCbot_and_black_hole_1) if x]
        indx_Black_hole_2 = [i for i, x in enumerate(Black_hole_2) if x]
        indx_Black_hole_3 = [i for i, x in enumerate(Black_hole_3) if x]
        indx_TBot_1 = [i for i, x in enumerate(TBot_1) if x]
        indx_TBot_2 = [i for i, x in enumerate(TBot_2) if x]
        indx_TBot_3 = [i for i, x in enumerate(TBot_3) if x]
        indx_TBot_4 = [i for i, x in enumerate(TBot_4) if x]
        indx_Weasel_master = [i for i, x in enumerate(Weasel_master) if x]
        indx_Weasel_bot = [i for i, x in enumerate(Weasel_bot) if x]
        indx_Zeus_1  = [i for i, x in enumerate(Zeus_1) if x]
        indx_Zeus_2 = [i for i, x in enumerate(Zeus_2) if x]
        indx_Zeus_3 = [i for i, x in enumerate(Zeus_3) if x]
        indx_bin_Zeus = [i for i, x in enumerate(bin_Zeus) if x]
        indx_Osx_trojan = [i for i, x in enumerate(Osx_trojan) if x]
        indx_zero_access_1 = [i for i, x in enumerate(zero_access_1) if x]
        indx_zero_access_2 = [i for i, x in enumerate(zero_access_2) if x]
        indx_Smoke_bot = [i for i, x in enumerate(Smoke_bot) if x]
        indx_zero_access_1 = [i for i, x in enumerate(zero_access_1) if x]
        indx_zero_access_2 = [i for i, x in enumerate(zero_access_2) if x]
        indx_Smoke_bot = [i for i, x in enumerate(Smoke_bot) if x]

        total_instances = df.shape[0]
        print("Total Instances:" + str(total_instances))

        print("bin_IRC_1_Instances:" + str(len(indx_IRC_1))+ " ---> "+ str(round(len(indx_IRC_1)/total_instances*100, 4)) + " %")
        print("bin_IRC_2_Instances:" + str(len(indx_IRC_2))+ " ---> "+ str(round(len(indx_IRC_2)/total_instances*100, 4)) + " %")
        print("bin_IRC_3_Instances:" + str(len(indx_IRC_3))+ " ---> "+ str(round(len(indx_IRC_3)/total_instances*100, 4)) + " %")
        print("bin_IRC_4_Instances:" + str(len(indx_IRC_4))+ " ---> "+ str(round(len(indx_IRC_4)/total_instances*100, 4)) + " %")
        print("bin_IRC_5_Instances:" + str(len(indx_IRC_5))+ " ---> "+ str(round(len(indx_IRC_5)/total_instances*100, 4)) + " %")
        print("bin_IRC_6_Instances:" + str(len(indx_IRC_6))+ " ---> "+ str(round(len(indx_IRC_6)/total_instances*100, 4)) + " %")
        print("bin_IRC_7_Instances:" + str(len(indx_IRC_7))+ " ---> "+ str(round(len(indx_IRC_7)/total_instances*100, 4)) + " %")
        print("bin_IRC_8_Instances:" + str(len(indx_IRC_8))+ " ---> "+ str(round(len(indx_IRC_8)/total_instances*100, 4)) + " %")
        print("bin_IRC_9_Instances:" + str(len(indx_IRC_9))+ " ---> "+ str(round(len(indx_IRC_9)/total_instances*100, 4)) + " %")
        print("bin_IRC_10_Instances:" + str(len(indx_IRC_10))+ " ---> "+ str(round(len(indx_IRC_10)/total_instances*100, 4)) + " %")
        print("bin_IRC_11_Instances:" + str(len(indx_IRC_11))+ " ---> "+ str(round(len(indx_IRC_11)/total_instances*100, 4)) + " %")
        print("bin_IRC_12_Instances:" + str(len(indx_IRC_12))+ " ---> "+ str(round(len(indx_IRC_12)/total_instances*100, 4)) + " %")
        print("bin_IRC_13_Instances:" + str(len(indx_IRC_13))+ " ---> "+ str(round(len(indx_IRC_13)/total_instances*100, 4)) + " %")
        print("bin_IRC_14_Instances:" + str(len(indx_IRC_14))+ " ---> "+ str(round(len(indx_IRC_14)/total_instances*100, 4)) + " %")
        print("bin_IRC_15_Instances:" + str(len(indx_IRC_15))+ " ---> "+ str(round(len(indx_IRC_15)/total_instances*100, 4)) + " %")

        print("Neris_Instances:" + str(len(indx_Neris)) + " ---> "+ str(round(len(indx_Neris)/total_instances*100, 4)) + " %")
        print("RBot_Instances:" + str(len(indx_RBot)) + " ---> "+ str(round(len(indx_RBot)/total_instances*100, 4)) + " %")
        print("Menti_Instances:" + str(len(indx_Menti)) + " ---> "+ str(round(len(indx_Menti)/total_instances*100, 4)) + " %")
        print("Sogou_Instances:" + str(len(indx_Sogou)) + " ---> "+ str(round(len(indx_Sogou)/total_instances*100, 4)) + " %")
        print("Murlo_Instances:" + str(len(indx_Murlo)) + " ---> "+ str(round(len(indx_Murlo)/total_instances*100, 4)) + " %")
        print("Virut_Instances:" + str(len(indx_Virut)) + " ---> "+ str(round(len(indx_Virut)/total_instances*100, 4)) + " %")
        print("IRCbot_and_black_hole_1_Instances:" + str(len(indx_IRCbot_and_black_hole_1)) + " ---> "+ str(round(len(indx_IRCbot_and_black_hole_1)/total_instances*100, 4)) + " %")
        print("Black_hole_2_Instances:" + str(len(indx_Black_hole_2)) + " ---> "+ str(round(len(indx_Black_hole_2)/total_instances*100, 4)) + " %")
        print("Black_hole_3_Instances:" + str(len(indx_Black_hole_3)) + " ---> "+ str(round(len(indx_Black_hole_3)/total_instances*100, 4)) + " %")
        print("TBot_1_Instances:" + str(len(indx_TBot_1)) + " ---> "+ str(round(len(indx_TBot_1)/total_instances*100, 4)) + " %")
        print("TBot_2_Instances:" + str(len(indx_TBot_2)) + " ---> "+ str(round(len(indx_TBot_2)/total_instances*100, 4)) + " %")
        print("TBot_3_Instances:" + str(len(indx_TBot_3)) + " ---> "+ str(round(len(indx_TBot_3)/total_instances*100, 4)) + " %")
        print("TBot_4_Instances:" + str(len(indx_TBot_4)) + " ---> "+ str(round(len(indx_TBot_4)/total_instances*100, 4)) + " %")
        print("Weasel_master_Instances:" + str(len(indx_Weasel_master)) + " ---> "+ str(round(len(indx_Weasel_master)/total_instances*100, 4)) + " %")
        print("Weasel_bot_Instances:" + str(len(indx_Weasel_bot)) + " ---> "+ str(round(len(indx_Weasel_bot)/total_instances*100, 4)) + " %")
        print("Zeus_1_Instances:" + str(len(indx_Zeus_1)) + " ---> "+ str(round(len(indx_Zeus_1)/total_instances*100, 4)) + " %")
        print("Zeus_2_Instances:" + str(len(indx_Zeus_2)) + " ---> "+ str(round(round(len(indx_Zeus_2)/total_instances*100, 4), 2)) + " %")
        print("Zeus_3_Instances:" + str(len(indx_Zeus_3)) + " ---> "+ str(round(len(indx_Zeus_3)/total_instances*100, 4)) + " %")
        print("bin_Zeus_Instances:" + str(len(indx_Zeus_3)) + " ---> "+ str(round(len(indx_Zeus_3)/total_instances*100, 4)) + " %")
        print("Osx_trojan_Instances:" + str(len(indx_Osx_trojan)) + " ---> "+ str(round(len(indx_Osx_trojan)/total_instances*100, 4)) + " %")
        print("zero_access_1_Instances:" + str(len(indx_zero_access_1)) + " ---> "+ str(round(len(indx_zero_access_1)/total_instances*100, 4)) + " %")
        print("zero_access_2_Instances:" + str(len(indx_zero_access_2)) + " ---> "+ str(round(len(indx_zero_access_2)/total_instances*100, 4)) + " %")
        print("Smoke_bot_Instances:" + str(len(indx_Smoke_bot)) + " ---> "+ str(round(len(indx_Smoke_bot)/total_instances*100, 4)) + " %")

if begin_from_start:

    if botnet == True:

        # This cell labels the 'Label' column in the data frame to 1 where the particular botnet was found

        data.loc[:, 'Label'] = 0.0

        data.loc[indx_IRC_2, 'Label'] = 1
        data.loc[indx_IRC_3, 'Label'] = 1
        data.loc[indx_IRC_4, 'Label'] = 1
        data.loc[indx_IRC_5, 'Label'] = 1
        data.loc[indx_IRC_6, 'Label'] = 1
        data.loc[indx_IRC_7, 'Label'] = 1
        data.loc[indx_IRC_11, 'Label'] = 1
        data.loc[indx_IRC_15, 'Label'] = 1
        data.loc[indx_Neris, 'Label'] = 1

        data.loc[indx_RBot, 'Label'] = 1

        data.loc[indx_Virut, 'Label'] = 1

        data.loc[indx_Zeus_2, 'Label'] = 1

    #     print(data['Label'])