Python 将一些参数从commadn行直接传递到matplotlib

Python 将一些参数从commadn行直接传递到matplotlib,python,python-3.x,matplotlib,argparse,keyword-argument,Python,Python 3.x,Matplotlib,Argparse,Keyword Argument,我有一个Python脚本,它接受一个输入csv和一组命令行参数,并使用matplotlib生成一个绘图作为输出 许多命令行参数在脚本中被显式处理,例如,提取输入的特定列、应用抖动等。这些都可以很好地工作 但是,许多参数基本上直接传递到最终的plot命令。例如,在命令行上指定--marker o只会导致marker='o'被传递到最终的绘图命令 要处理这个问题,我有如下方法: kwargs=[] 如果(参数标记): kwargs['marker']=args.marker #其他plot关键字参数

我有一个Python脚本,它接受一个输入csv和一组命令行参数,并使用matplotlib生成一个绘图作为输出

许多命令行参数在脚本中被显式处理,例如,提取输入的特定列、应用抖动等。这些都可以很好地工作

但是,许多参数基本上直接传递到最终的plot命令。例如,在命令行上指定
--marker o
只会导致
marker='o'
被传递到最终的
绘图
命令

要处理这个问题,我有如下方法:

kwargs=[]
如果(参数标记):
kwargs['marker']=args.marker
#其他plot关键字参数的更多类似行
df.地块(…,**kwargs)
如何删除每个此类参数的
if(args.foo):…
样板文件

以下是完整的脚本:

#!/usr/bin/env python3

import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import pandas as pd
import numpy as np
import csv
import argparse
import sys
import collections
import os
import json

# for arguments that should be comma-separate lists, we use splitlsit as the type
splitlist = lambda x: x.split(',')

p = argparse.ArgumentParser(usage='plot output from PLOT=1 ./bench')

# input and output file configuration
p.add_argument('input', help='CSV file to plot (or stdin)', nargs='*',
    type=argparse.FileType('r'), default=[ sys.stdin ])
p.add_argument('--out', help='output filename')

# input parsing configuration
p.add_argument('--sep', help='separator character (or regex) for input', default=',')

# column selection and configuration
p.add_argument('--xcol', help='Column index to use as x axis (default: 0)', type=int, default=0)
p.add_argument('--cols-by-name', help='Use only these comma-separated columns, specified by "name", i.e., the column header (default: all columns)',
    type=splitlist)
p.add_argument('--allxticks', help="Force one x-axis tick for each value, disables auto ticks and may crowd x-axis", action='store_true')
p.add_argument('--cols',  help='Use only these zero-based columns on primary axis (default: all columns)',
    type=int, nargs='+')
p.add_argument('--cols2', help='Use only these zero-based columns on secondary axis (default: no secondary axis)',
    type=int, nargs='+')
p.add_argument('--color-map', help='A JSON map from column name to color to use for that column',
    type=json.loads)

# chart labels and text
p.add_argument('--clabels', help="Comma separated list of column names used as label for data series (default: column header)",
    type=splitlist)
p.add_argument('--scatter', help='Do an XY scatter plot (default is a line splot with x values used only as labels)', action='store_true')
p.add_argument('--title', help='Set chart title', default='Some chart (use --title to specify title)')
p.add_argument('--xlabel', help='Set x axis label')
p.add_argument('--ylabel', help='Set y axis label')
p.add_argument('--suffix-names', help='Suffix each column name with the file it came from', action='store_true')

# data manipulation
p.add_argument('--jitter', help='Apply horizontal (x-axis) jitter of the given relative amount (default 0.1)',
    nargs='?', type=float, const=0.1)
p.add_argument('--group', help='Group data by the first column, with new min/median/max columns with one row per group')

# axis and line/point configuration
p.add_argument('--ylim', help='Set the y axis limits explicitly (e.g., to cross at zero)', type=float, nargs='+')
p.add_argument('--xrotate', help='rotate the xlablels by this amount', default=0)
p.add_argument('--tick-interval', help='use the given x-axis tick spacing (in x axis units)', type=int)
p.add_argument('--marker', help='use the given marker', type=str)
p.add_argument('--markersize', help='use the given marker', type=float)
p.add_argument('--linewidth', help='use the given line width', type=float)


# debugging
p.add_argument('--verbose', '-v', help='enable verbose logging', action='store_true')
args = p.parse_args()

vprint = print if args.verbose else lambda *a: None
vprint("args = ", args)

# if we are reading from stdin and stdin is a tty, print a warning since maybe the user just messed up
# the arguments and otherwise we just appear to hang
if (args.input and args.input[0] == sys.stdin):
    print("reading from standard input...", file=sys.stderr)

xi = args.xcol
dfs = []
for f in args.input:
    df = pd.read_csv(f, sep=args.sep)
    if args.suffix_names:
        df = df.add_suffix(' ' + os.path.basename(f.name))
    vprint("----- df from: ", f.name, "-----\n", df.head(), "\n---------------------")
    dfs.append(df)

df = pd.concat(dfs, axis=1)
vprint("----- merged df -----\n", df.head(), "\n---------------------")

# renames duplicate columns by suffixing _1, _2 etc
class renamer():
    def __init__(self):
        self.d = dict()

    def __call__(self, x):
        if x not in self.d:
            self.d[x] = 0
            return x
        else:
            self.d[x] += 1
            return "%s_%d" % (x, self.d[x])


# rename any duplicate columns because otherwise Pandas gets mad
df = df.rename(columns=renamer())

vprint("---- renamed df ----\n", df.head(), "\n---------------------")

def col_names_to_indices(requested, df):
    vprint("requested columns: ", requested)
    colnames = [x.strip() for x in df.columns.tolist()]
    vprint("actual columns: ", colnames)
    cols = []
    for name in requested:
        if not name in colnames:
            exit("column name " + name + " not found, input columns were: " + ','.join(colnames))
        cols.append(colnames.index(name))
    return cols


def extract_cols(cols, df, name):
    vprint(name, "axis columns: ", cols)
    if (not cols): return None
    if (max(cols) >= len(df.columns)):
        print("Column", max(cols), "too large: input only has", len(df.columns), "columns", file=sys.stderr)
        exit(1)
    # ensure xi is the first thing in the column list
    if xi in cols: cols.remove(xi)
    cols = [xi] + cols
    vprint(name, " final columns: ", cols)
    pruned = df.iloc[:, cols]
    vprint("----- pruned ", name, " df -----\n", pruned.head(), "\n---------------------")
    return pruned


if args.cols_by_name:
    cols = col_names_to_indices(args.cols_by_name, df)
elif args.cols:
    cols = args.cols
else:
    cols = list(range(len(df.columns)))

df = extract_cols(cols, df, "primary")
df2 = extract_cols(args.cols2, df, "secondary")

if args.clabels:
    if len(df.columns) != len(args.clabels):
        sys.exit("ERROR: number of column labels " + str(len(args.clabels)) +
                " not equal to the number of selected columns " + str(len(df.columns)))
    df.columns = args.clabels

# dupes will break pandas beyond this point, should be impossible due to above renaming
dupes = df.columns.duplicated()
if True in dupes:
    print("Duplicate columns after merge and pruning, consider --suffix-names",
        df.columns[dupes].values.tolist(), file=sys.stderr)
    exit(1)

# do grouping (feature not complete)
if (args.group):
    vprint("before grouping\n", df)

    dfg = df.groupby(by=df.columns[0])

    df = dfg.agg([min, pd.DataFrame.median, max])

    vprint("agg\n---------------\n", df)

    df.columns = [tup[0] + ' (' + tup[1] + ')' for tup in df.columns.values]
    df.reset_index(inplace=True)

    vprint("flat\n---------------\n", df)

def jitter(arr, multiplier):
    stdev = multiplier*(max(arr)-min(arr))/len(arr)
    return arr if not len(arr) else arr + np.random.randn(len(arr)) * stdev

if args.jitter:
    df.iloc[:,xi] = jitter(df.iloc[:,xi], args.jitter)

kwargs = {}

if (args.linewidth):
    kwargs["linewidth"] = args.linewidth

if args.color_map:
    colors = []
    for i, cname in enumerate(df.columns):
        if i == xi:
            continue
        if cname in args.color_map:
            vprint("Using color {} for column {}".format(args.color_map[cname], cname))
            colors.append(args.color_map[cname])
        else:
            print("WARNING no entry for column {} in given color-map".format(cname))
    vprint("colors = ", colors)
    kwargs["color"] = colors

if (args.scatter):
    kwargs['linestyle'] = 'none'
    kwargs['marker'] = args.marker if args.marker else '.'
elif (args.marker):
    kwargs['marker'] = args.marker

# set x labels to strings so we don't get a scatter plot, and
# so the x labels are not themselves plotted
#if (args.scatter):
#    ax = df.plot.line(x=0, title=args.title, figsize=(12,8), grid=True, **kwargs)
#else:
    # df.iloc[:,xi] = df.iloc[:,xi].apply(str)
ax = df.plot.line(x=0, title=args.title, figsize=(12,8), grid=True, **kwargs)

# this sets the ticks explicitly to one per x value, which means that
# all x values will be shown, but the x-axis could be crowded if there
# are too many
if args.allxticks:
    ticks = df.iloc[:,xi].values
    plt.xticks(ticks=range(len(ticks)), labels=ticks)

if (args.tick_interval):
    ax.xaxis.set_major_locator(plticker.MultipleLocator(base=args.tick_interval))

if (args.xrotate):
    plt.xticks(rotation=args.xrotate)

if args.ylabel:
    ax.set_ylabel(args.ylabel)

if args.xlabel:
    ax.set_xlabel(args.xlabel)

if args.ylim:
    if (len(args.ylim) == 1):
        ax.set_ylim(args.ylim[0])
    elif (len(args.ylim) == 2):
        ax.set_ylim(args.ylim[0], args.ylim[1])
    else:
        sys.exit('provide one or two args to --ylim')

# secondary axis handling
if df2 is not None:
    df2.plot(x=0, secondary_y=True, ax=ax, grid=True)

if (args.out):
    vprint("Saving figure to ", args.out, "...")
    plt.savefig(args.out)
else:
    vprint("Showing interactive plot...")
    plt.show()
```

vars(args)
是一本字典。您可以将其部分复制,并将其用作
kwargs
,或者使用它来
更新另一本词典。dict理解可能是复制一组选定键的好方法。
argparse.py
代码本身可能会给您一些想法。它的许多方法都采用
**kwargs
参数。然后,他们使用
pop
删除所选关键点以供特殊使用,然后将其余的关键点传递给其他方法。