将hadoop流与python合并器一起使用时失败
我尝试使用python提供的hadoop流计算输入键的平均值。以下是映射器、合路器和减速器的代码:将hadoop流与python合并器一起使用时失败,python,hadoop,streaming,Python,Hadoop,Streaming,我尝试使用python提供的hadoop流计算输入键的平均值。以下是映射器、合路器和减速器的代码: #mapper: import sys def map(argv): line = sys.stdin.readline() try: while line: word, num = line.split() num = int(num) print word+'\t'+str(num)
#mapper:
import sys
def map(argv):
line = sys.stdin.readline()
try:
while line:
word, num = line.split()
num = int(num)
print word+'\t'+str(num)
line = sys.stdin.readline()
except Exception, ex:
print 'mapper ex:'+str(ex)
return None
if __name__ == "__main__":
map(sys.argv)
#combiner
import sys
def combine(argv):
line = sys.stdin.readline()
cur_word = ''
cur_num = 0
cur_times = 0
try:
while line:
word, num = line.split('\t')
if word != cur_word:
if cur_word != '':
print cur_word+'\t'+str(cur_num)+'\t'+str(cur_times)
cur_word = word
cur_num = 0
cur_times = 0
cur_num += int(num)
cur_times += 1
line = sys.stdin.readline()
print cur_word+'\t'+str(cur_num)+'\t'+str(cur_times)
except Exception, ex:
print 'except:{0}'.format(ex)
return None
if __name__ == "__main__":
combine(sys.argv)
#reducer
import sys
def reduce(argv):
line = sys.stdin.readline()
cur_word = ''
cur_num = 0
cur_times = 0
try:
while line:
word, num, times = line.split('\t')
if word != cur_word:
if cur_word != '':
if cur_times != 0:
avr = cur_num / cur_times
print cur_word+'\t'+str(cur_num)+'\t'+str(cur_times)+'\t'+str(avr)
else:
print cur_word+'\t'+str(cur_num)+'\t'+str(cur_times)+'\t'+'0'
cur_word = word
cur_num = 0
cur_times = 0
cur_num += int(num)
cur_times += int(times)
line = sys.stdin.readline()
if cur_times != 0:
avr = cur_num / cur_times
print cur_word+'\t'+str(cur_num)+'\t'+str(cur_times)+'\t'+str(avr)
else:
print cur_word+'\t'+str(cur_num)+'\t'+str(cur_times)+'\t'+'0'
except Exception, ex:
print 'except:{0}'.format(ex)
return None
if __name__ == "__main__":
reduce(sys.argv)
这似乎是一个简单的映射合并还原过程,不是吗?但reduce每次都失败了。
但是,如果我不使用combiner,而是使用combiner.py作为减速机,那么它可以工作
有人愿意帮忙吗,非常感谢。首先,您的合路器应该输入/输出相同格式的数据-您正在接受
word,num
对,并发出word,num,times
三重。这可能是您的问题-请记住,组合器可以在单个mapperThanks的输出上运行零次、一次或多次!我修改映射器,使其生成的结果为单词、num、times
,与组合器的输入相同。现在可以了。@084您好,您能在这里发布您的最终脚本吗?并标记为正确答案?我被困在同一个场景中!谢谢你,伙计!