Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/346.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 赋值这两行需要再缩进一级;我会解决它们的。@Martijn Pieters,很高兴看到它在运行,想知道有没有什么快速的方法可以添加文件夹,文件名层次结构作为前缀,这样就可以很容易地找到任务的来源。示例[任务][Do1][Do2][DL.txt][Task]_Python_Python 3.x_Random Sample - Fatal编程技术网

Python 赋值这两行需要再缩进一级;我会解决它们的。@Martijn Pieters,很高兴看到它在运行,想知道有没有什么快速的方法可以添加文件夹,文件名层次结构作为前缀,这样就可以很容易地找到任务的来源。示例[任务][Do1][Do2][DL.txt][Task]

Python 赋值这两行需要再缩进一级;我会解决它们的。@Martijn Pieters,很高兴看到它在运行,想知道有没有什么快速的方法可以添加文件夹,文件名层次结构作为前缀,这样就可以很容易地找到任务的来源。示例[任务][Do1][Do2][DL.txt][Task],python,python-3.x,random-sample,Python,Python 3.x,Random Sample,赋值这两行需要再缩进一级;我会解决它们的。@Martijn Pieters,很高兴看到它在运行,想知道有没有什么快速的方法可以添加文件夹,文件名层次结构作为前缀,这样就可以很容易地找到任务的来源。示例[任务][Do1][Do2][DL.txt][Task];[Task][Do3][Do5][DL20.txt][Task]还添加了语句“print(random_tasks)”,但输出显示为一个段落,有点不可读。您可以写:(单词在句子中的字母在单词中的字母在单词中如果好(字母))而不是链Hmm,你说


赋值这两行需要再缩进一级;我会解决它们的。@Martijn Pieters,很高兴看到它在运行,想知道有没有什么快速的方法可以添加文件夹,文件名层次结构作为前缀,这样就可以很容易地找到任务的来源。示例[任务][Do1][Do2][DL.txt][Task];[Task][Do3][Do5][DL20.txt][Task]还添加了语句“print(random_tasks)”,但输出显示为一个段落,有点不可读。您可以写:
(单词在句子中的字母在单词中的字母在单词中如果好(字母))
而不是
Hmm,你说得对。我想我是从
链开始使用的。从我尝试不同的东西时开始,它在我最终发布的版本中是不必要的。直接生成器表达式更清晰,所以我会尝试使用它(我想它也会为我节省一行代码,因为我不需要单独剥离这些行)。您还可以在
任务_管道()
中编写显式for循环和
屈服线
。它应该生成最可读的版本。此外,在这种情况下,使用带有open(filename)的
作为文件是很自然的:
(如果树包含大量txt文件,则需要这样做,以避免“打开的文件太多”错误)@MartijnPieters:如果前面一行中的r
检查,您将丢失
。这表示在获得前n个值后发生替换的概率降低。您是对的,算法可能返回小于n个值,但只有在项目中的值小于n时才会发生rable(它将以随机顺序返回它们)。对,事实上,
r
将阻止索引器,我错过了这一点。:-P收回了这两条评论。
#!/usr/bin/python  
import random   
with open('C:\\Tasks\\file.txt') as f:  
    lines = random.sample(f.readlines(),10)    
print(lines)
import os

root_path = r'C:\Tasks\\'
total_lines = 0
file_indices = dict()

# Based on https://stackoverflow.com/q/845058, bufcount function
def linecount(filename, buf_size=1024*1024):
    with open(filename) as f:
        return sum(buf.count('\n') for buf in iter(lambda: f.read(buf_size), ''))

for dirpath, dirnames, filenames in os.walk(root_path):
    for filename in filenames:
         if not filename.endswith('.txt'):
             continue
         path = os.path.join(dirpath, filename)
         file_indices[total_lines] = path
         total_lines += linecount(path)

offsets = list(file_indices.keys())
offsets.sort()
import random
import bisect

tasks = list(range(total_lines))
task_indices = random.sample(tasks, 10)

for index in task_indices:
     # find the closest file index
     file_index = offsets[bisect.bisect(offsets, index) - 1]
     path = file_indices[file_index]
     curr_line = file_index
     with open(path) as f:
         while curr_line <= index:
             task = f.readline()
             curr_line += 1
     print(task)
     tasks.remove(index)
import os
import random

def file_iterator(top_dir):
    """Gather all task files"""
    files = []
    for dirpath, dirnames, filenames in os.walk(top_dir):
        for filename in filenames:
            if not filename.endswith('.txt'):
                continue
            path = os.path.join(dirpath, filename)
            files.append(path)
    return files


def random_lines(files, number=10):
    """Select a random file, select a random line until we have enough
    """
    selected_tasks = []

    while len(selected_tasks) < number:
        f = random.choice(files)
        with open(f) as tasks:
            lines = tasks.readlines()
            l = random.choice(lines)
            selected_tasks.append(l)
    return selected_tasks


## Usage
files = file_iterator(r'C:\\Tasks')
random_tasks = random_lines(files)
import random

def random_sample(n, items):
    results = []

    for i, v in enumerate(items):
        r = random.randint(0, i)
        if r < n:
            if i < n:
                results.insert(r, v) # add first n items in random order
            else:
                results[r] = v # at a decreasing rate, replace random items

    if len(results) < n:
        raise ValueError("Sample larger than population.")

    return results
import os

def lines_generator(base_folder, exclude = None):
    for dirpath, dirs, files in os.walk(base_folder):
        for filename in files:
            if filename.endswith(".txt"):
                fullPath = os.path.join(dirpath, filename)
                with open(fullPath) as f:
                     for line in f:
                         cleanLine = line.strip()
                         if exclude is None or cleanLine not in exclude:
                             yield cleanLine
_seen = set()

def get_sample(n, count = None):
    base_folder = r"C:\Tasks"
    if count is None:
        sample = random_sample(n, lines_generator(base_folder, _seen))
        _seen.update(sample)
        return sample
    else:
        sample = random_sample(count * n, lines_generator(base_folder, _seen))
        _seen.update(sample)
        return [sample[i * n:(i + 1) * n] for i in range(count)]
def main():
    s1 = get_sample(10)
    print("Sample1:", *s1, sep="\n")

    s2, s3 = get_sample(10,2) # get two samples with only one read of the files
    print("\nSample2:", *s2, sep="\n")
    print("\nSample3:", *s3, sep="\n")

    s4 = get_sample(5000) # this will probably raise a ValueError!