Python 检查列表中项目的交集

Python 检查列表中项目的交集,python,arrays,list,Python,Arrays,List,假设我有6个列表或数组。每个列表都有任意数量的单词 0 | 1 | 2 | 3 | 4 | 5 | ... N ----------------------------------------------------------- cat dog pine tree light fan cat dog pine tree light fan

假设我有6个列表或数组。每个列表都有任意数量的单词

   0   |    1   |    2   |    3    |    4    |   5    | ... N
-----------------------------------------------------------
  cat     dog      pine    tree       light    fan
  cat     dog      pine    tree       light    fan
  cat     dog      pine    tree       light    fan
  cat     dog      pine    tree       light    fan
  cat     dog      pine    tree       light    fan
我不想把所有这些单词都打出来,但我想说的是,我想得到一个交集。查找所有元素的交集非常简单,可以在python中使用以下函数完成:

all = set(zer0).intersection(one).intersection(two).intersection(...N)
    =========== start data_0 ==============
(0, ((u'the',), 13))
(1, ((u'of',), 10))
(2, ((u'rainforests',), 7))
(3, ((u'and',), 7))
(4, ((u'tropical',), 5))
(5, ((u'to',), 4))
(6, ((u'rainforest',), 4))
(7, ((u'in',), 4))
(8, ((u'are',), 4))
(9, ((u'a',), 4))
(10, ((u'it',), 3))
(11, ((u'by',), 3))
(12, ((u'been',), 3))
(13, ((u's',), 3))
(14, ((u'is',), 3))
(15, ((u'there',), 3))
(16, ((u'have',), 2))
(17, ((u'earth',), 2))
(18, ((u'sometimes',), 2))
(19, ((u'also',), 2))
(20, ((u'oxygen',), 2))
(21, ((u'jungle',), 2))
(22, ((u'rainfall',), 2))
(23, ((u'for',), 2))
(24, ((u'through',), 2))
(25, ((u'called',), 2))
(26, ((u'be',), 2))
(27, ((u'world',), 2))
(28, ((u'species',), 2))
(29, ((u'ground',), 2))
(30, ((u'shrubs',), 1))
(31, ((u'may',), 1))
(32, ((u'biotic',), 1))
(33, ((u'from',), 1))
(34, ((u'respiration',), 1))
(35, ((u'known',), 1))
(36, ((u'largest',), 1))
(37, ((u'discovered',), 1))
(38, ((u'two',), 1))
(39, ((u'plants',), 1))
(40, ((u'conditions',), 1))
(41, ((u'insects',), 1))
(42, ((u'necessary',), 1))
(43, ((u'1',), 1))
(44, ((u'convergence',), 1))
(45, ((u'jewels',), 1))
(46, ((u'poor',), 1))
(47, ((u'estimated',), 1))
(48, ((u'if',), 1))
(49, ((u'creating',), 1))
(50, ((u'that',), 1))
(51, ((u'75',), 1))
(52, ((u'growth',), 1))
(53, ((u'penetration',), 1))
(54, ((u'thinned',), 1))
(55, ((u'has',), 1))
(56, ((u'characterized',), 1))
(57, ((u'plays',), 1))
(58, ((u'temperate',), 1))
(59, ((u'production',), 1))
(60, ((u'because',), 1))
(61, ((u'high',), 1))
(62, ((u'98',), 1))
(63, ((u'trough',), 1))
(64, ((u'centimetres',), 1))
(65, ((u'over',), 1))
(66, ((u'some',), 1))
(67, ((u'undiscovered',), 1))
(68, ((u'natural',), 1))
(69, ((u'still',), 1))
(70, ((u'misnamed',), 1))
(71, ((u'all',), 1))
(72, ((u'many',), 1))
(73, ((u'sunlight',), 1))
(74, ((u'millions',), 1))
(75, ((u'dioxide',), 1))
(76, ((u'around',), 1))
(77, ((u'28',), 1))
(78, ((u'monsoon',), 1))
(79, ((u'canopy',), 1))
(80, ((u'photosynthesis',), 1))
(81, ((u'level',), 1))
(82, ((u'177',), 1))
(83, ((u'trees',), 1))
(84, ((u'carbon',), 1))
(85, ((u'one',), 1))
(86, ((u'4',), 1))
(87, ((u'between',), 1))
(88, ((u'areas',), 1))
(89, ((u'responsible',), 1))
(90, ((u'as',), 1))
(91, ((u'vines',), 1))
(92, ((u'450',), 1))
(93, ((u'turnover',), 1))
(94, ((u'leaf',), 1))
(95, ((u'role',), 1))
(96, ((u'indigenous',), 1))
(97, ((u'can',), 1))
(98, ((u'with',), 1))
(99, ((u'types',), 1))
(100, ((u'alternatively',), 1))
(101, ((u'annual',), 1))
(102, ((u'generally',), 1))
(103, ((u'zone',), 1))
(104, ((u'beneath',), 1))
(105, ((u'significant',), 1))
(106, ((u'consuming',), 1))
(107, ((u'microorganisms',), 1))
(108, ((u'applied',), 1))
(109, ((u'soon',), 1))
(110, ((u'2',), 1))
(111, ((u'tangled',), 1))
(112, ((u'250',), 1))
(113, ((u'restricted',), 1))
(114, ((u'undergrowth',), 1))
(115, ((u'medicines',), 1))
(116, ((u'climatic',), 1))
(117, ((u'colonized',), 1))
(118, ((u'forests',), 1))
(119, ((u'dense',), 1))
(120, ((u'pharmacy',), 1))
(121, ((u'quarter',), 1))
(122, ((u'intertropical',), 1))
(123, ((u'term',), 1))
(124, ((u'or',), 1))
(125, ((u'destroyed',), 1))
(126, ((u'processing',), 1))
(127, ((u'3',), 1))
(128, ((u'small',), 1))
(129, ((u'40',), 1))
    =========== start data_1 ==============
(0, ((u'the',), 15))
(1, ((u'of',), 8))
(2, ((u'in',), 6))
(3, ((u'and',), 6))
(4, ((u'tropical',), 5))
(5, ((u'cm',), 4))
(6, ((u'to',), 3))
(7, ((u'are',), 3))
(8, ((u'rainforests',), 3))
(9, ((u'forests',), 3))
(10, ((u'south',), 2))
(11, ((u'from',), 2))
(12, ((u'it',), 2))
(13, ((u'g',), 2))
(14, ((u'no',), 2))
(15, ((u'known',), 2))
(16, ((u'rainforest',), 2))
(17, ((u'exceed',), 2))
(18, ((u'although',), 2))
(19, ((u'typically',), 2))
(20, ((u'america',), 2))
(21, ((u'e',), 2))
(22, ((u'many',), 2))
(23, ((u's',), 2))
(24, ((u'between',), 2))
(25, ((u'as',), 2))
(26, ((u'is',), 2))
(27, ((u'with',), 2))
(28, ((u'zone',), 2))
(29, ((u'congo',), 2))
(30, ((u'tropic',), 2))
(31, ((u'equatorial',), 1))
(32, ((u'within',), 1))
(33, ((u'located',), 1))
(34, ((u'convergence',), 1))
(35, ((u'now',), 1))
(36, ((u'el',), 1))
(37, ((u'by',), 1))
(38, ((u'saharan',), 1))
(39, ((u'average',), 1))
(40, ((u'lungs',), 1))
(41, ((u'less',), 1))
(42, ((u'64',), 1))
(43, ((u'have',), 1))
(44, ((u'degreef',), 1))
(45, ((u'temperatures',), 1))
(46, ((u'1',), 1))
(47, ((u'africa',), 1))
(48, ((u'earth',), 1))
(49, ((u'200',), 1))
(50, ((u'australia',), 1))
(51, ((u'18',), 1))
(52, ((u'peninsula',), 1))
(53, ((u'indonesia',), 1))
(54, ((u'that',), 1))
(55, ((u'390',), 1))
(56, ((u'been',), 1))
(57, ((u'10',), 1))
(58, ((u'characterized',), 1))
(59, ((u'also',), 1))
(60, ((u'yucatan',), 1))
(61, ((u'6',), 1))
(62, ((u'such',), 1))
(63, ((u'months',), 1))
(64, ((u'000',), 1))
(65, ((u'islands',), 1))
(66, ((u'trough',), 1))
(67, ((u'dry',), 1))
(68, ((u'66',), 1))
(69, ((u'equator',), 1))
(70, ((u'season',), 1))
(71, ((u'mean',), 1))
(72, ((u'sub',), 1))
(73, ((u'oxygen',), 1))
(74, ((u'degrees',), 1))
(75, ((u'7',), 1))
(76, ((u'rainfall',), 1))
(77, ((u'lanka',), 1))
(78, ((u'all',), 1))
(79, ((u'monthly',), 1))
(80, ((u'cancer',), 1))
(81, ((u'monsoon',), 1))
(82, ((u'asia',), 1))
(83, ((u'on',), 1))
(84, ((u'photosynthesis',), 1))
(85, ((u'degreec',), 1))
(86, ((u'southern',), 1))
(87, ((u'location',), 1))
(88, ((u'addition',), 1))
(89, ((u'sri',), 1))
(90, ((u'capricorn',), 1))
(91, ((u'southeast',), 1))
(92, ((u'warm',), 1))
(93, ((u'found',), 1))
(94, ((u'through',), 1))
(95, ((u'cameroon',), 1))
(96, ((u'climate',), 1))
(97, ((u'called',), 1))
(98, ((u'bosawas',), 1))
(99, ((u'pacific',), 1))
(100, ((u'69',), 1))
(101, ((u'5',), 1))
(102, ((u'can',), 1))
(103, ((u'burma',), 1))
(104, ((u'79',), 1))
(105, ((u'papua',), 1))
(106, ((u'annual',), 1))
(107, ((u'lies',), 1))
(108, ((u'atmosphere',), 1))
(109, ((u'substantial',), 1))
(110, ((u'new',), 1))
(111, ((u'168',), 1))
(112, ((u'category',), 1))
(113, ((u'moist',), 1))
(114, ((u'year',), 1))
(115, ((u'little',), 1))
(116, ((u'contribute',), 1))
(117, ((u'during',), 1))
(118, ((u'175',), 1))
(119, ((u'belize',), 1))
(120, ((u'wet',), 1))
(121, ((u'than',), 1))
(122, ((u'guinea',), 1))
(123, ((u'north',), 1))
(124, ((u'philippines',), 1))
(125, ((u'hawai\u02bbi',), 1))
(126, ((u'myanmar',), 1))
(127, ((u'world',), 1))
(128, ((u'peten',), 1))
(129, ((u'exist',), 1))
(130, ((u'net',), 1))
(131, ((u'a',), 1))
(132, ((u'broader',), 1))
(133, ((u'intertropical',), 1))
(134, ((u'calakmul',), 1))
(135, ((u'central',), 1))
(136, ((u'associated',), 1))
(137, ((u'malaysia',), 1))
(138, ((u'amazon',), 1))
    =========== start data_2 ==============
(0, ((u'in',), 11))
(1, ((u'the',), 9))
(2, ((u'and',), 9))
(3, ((u'of',), 7))
(4, ((u'temperate',), 3))
(5, ((u'southern',), 3))
(6, ((u'as',), 3))
(7, ((u'coastal',), 3))
(8, ((u'rainforests',), 3))
(9, ((u'east',), 2))
(10, ((u'parts',), 2))
(11, ((u'america',), 2))
(12, ((u'areas',), 2))
(13, ((u'british',), 2))
(14, ((u'coast',), 2))
(15, ((u'occur',), 2))
(16, ((u'regions',), 2))
(17, ((u'are',), 1))
(18, ((u'turkey',), 1))
(19, ((u'they',), 1))
(20, ((u'on',), 1))
(21, ((u'australia',), 1))
(22, ((u'far',), 1))
(23, ((u'oregon',), 1))
(24, ((u'galicia',), 1))
(25, ((u'chile',), 1))
(26, ((u'island',), 1))
(27, ((u'few',), 1))
(28, ((u'zealand',), 1))
(29, ((u'columbia',), 1))
(30, ((u'but',), 1))
(31, ((u'world',), 1))
(32, ((u'sea',), 1))
(33, ((u'taiwan',), 1))
(34, ((u'northwest',), 1))
(35, ((u'europe',), 1))
(36, ((u'10',), 1))
(37, ((u'much',), 1))
(38, ((u'also',), 1))
(39, ((u'north',), 1))
(40, ((u'adriatic',), 1))
(41, ((u'such',), 1))
(42, ((u'cover',), 1))
(43, ((u'forests',), 1))
(44, ((u'part',), 1))
(45, ((u'including',), 1))
(46, ((u'western',), 1))
(47, ((u'a',), 1))
(48, ((u'norway',), 1))
(49, ((u'large',), 1))
(50, ((u'georgia',), 1))
(51, ((u'well',), 1))
(52, ((u'south',), 1))
(53, ((u'globe',), 1))
(54, ((u'tropical',), 1))
(55, ((u'adjacent',), 1))
(56, ((u'washington',), 1))
(57, ((u'only',), 1))
(58, ((u'russian',), 1))
(59, ((u'pacific',), 1))
(60, ((u'japan',), 1))
(61, ((u'black',), 1))
(62, ((u'along',), 1))
(63, ((u'highlands',), 1))
(64, ((u'ireland',), 1))
(65, ((u'sakhalin',), 1))
(66, ((u'balkans',), 1))
(67, ((u'korea',), 1))
(68, ((u'asia',), 1))
(69, ((u'around',), 1))
(70, ((u'scotland',), 1))
(71, ((u'eastern',), 1))
(72, ((u'alaska',), 1))
(73, ((u'china',), 1))
(74, ((u'isles',), 1))
(75, ((u'new',), 1))
(76, ((u'california',), 1))
我想确保我没有错过一个更简单的解决方案,也没有过度思考这个问题

对于上面的示例,要获得任意两个列表的匹配,我需要执行以下操作

0&1, 0&2, 0&3, 0&4, 0&5, 0&..N
三个人

0&1&2, 0&1&3, 0&1&4, 0&1&5, 0&1&..N
我之所以问这个问题,是因为看一个只有两个列表的示例,如果数组0和数组1不包含相似的单词,但是数组0和数组3包含相似的单词,该怎么办

有没有一种方法可以概括这一点,我有一种强烈的感觉,它已经被解决了,我在思考这个问题

我希望能够找出猫这个词出现的原因 0,1,2,…N个列表

[编辑] 以下是我正在处理的一些问题

data0 = unicode("Rainforests are forests characterized by high rainfall, with annual rainfall between 250 and 450 centimetres (98 and 177 in).[1] There are two types of rainforest: tropical rainforest and temperate rainforest. The monsoon trough, alternatively known as the intertropical convergence zone, plays a significant role in creating the climatic conditions necessary for the Earth's tropical rainforests. Around 40% to 75% of all biotic species are indigenous to the rainforests.[2] It has been estimated that there may be many millions of species of plants, insects and microorganisms still undiscovered in tropical rainforests. Tropical rainforests have been called the \"jewels of the Earth\" and the \"world's largest pharmacy\", because over one quarter of natural medicines have been discovered there.[3] Rainforests are also responsible for 28% of the world's oxygen turnover, sometimes misnamed oxygen production,[4] processing it through photosynthesis from carbon dioxide and consuming it through respiration. The undergrowth in some areas of a rainforest can be restricted by poor penetration of sunlight to ground level. If the leaf canopy is destroyed or thinned, the ground beneath is soon colonized by a dense, tangled growth of vines, shrubs and small trees, called a jungle. The term jungle is also sometimes applied to tropical rainforests generally.", "utf-8")

data1 = unicode("Tropical rainforests are characterized by a warm and wet climate with no substantial dry season: typically found within 10 degrees north and south of the equator. Mean monthly temperatures exceed 18 °C (64 °F) during all months of the year.[5] Average annual rainfall is no less than 168 cm (66 in) and can exceed 1,000 cm (390 in) although it typically lies between 175 cm (69 in) and 200 cm (79 in).[6] Many of the world's tropical forests are associated with the location of the monsoon trough, also known as the intertropical convergence zone.[7] The broader category of tropical moist forests are located in the equatorial zone between the Tropic of Cancer and Tropic of Capricorn. Tropical rainforests exist in Southeast Asia (from Myanmar (Burma) to the Philippines, Malaysia, Indonesia, Papua New Guinea, Sri Lanka, Sub-Saharan Africa from Cameroon to the Congo (Congo Rainforest), South America (e.g. the Amazon Rainforest), Central America (e.g. Bosawás, southern Yucatán Peninsula-El Peten-Belize-Calakmul), Australia, and on many of the Pacific Islands (such as Hawaiʻi). Tropical forests have been called the \"Earth's lungs\", although it is now known that rainforests contribute little net oxygen addition to the atmosphere through photosynthesis", "utf-8")

data2 = unicode("Tropical forests cover a large part of the globe, but temperate rainforests only occur in few regions around the world. Temperate rainforests are rainforests in temperate regions. They occur in North America (in the Pacific Northwest in Alaska, British Columbia, Washington, Oregon and California), in Europe (parts of the British Isles such as the coastal areas of Ireland and Scotland, southern Norway, parts of the western Balkans along the Adriatic coast, as well as in Galicia and coastal areas of the eastern Black Sea, including Georgia and coastal Turkey), in East Asia (in southern China, Highlands of Taiwan, much of Japan and Korea, and on Sakhalin Island and the adjacent Russian Far East coast), in South America (southern Chile) and also in Australia and New Zealand.[10]", "utf-8")
我清理文本,将其标记为三个列表,data0\u list。。。数据2_列表

之后,像这样的函数调用将输出数据

master_list.append(data_0)
master_list.append(data_1)
master_list.append(data_2)

for item in master_list:
    for index, item in enumerate(item):
        print(index, item)
该输出如下所示:

all = set(zer0).intersection(one).intersection(two).intersection(...N)
    =========== start data_0 ==============
(0, ((u'the',), 13))
(1, ((u'of',), 10))
(2, ((u'rainforests',), 7))
(3, ((u'and',), 7))
(4, ((u'tropical',), 5))
(5, ((u'to',), 4))
(6, ((u'rainforest',), 4))
(7, ((u'in',), 4))
(8, ((u'are',), 4))
(9, ((u'a',), 4))
(10, ((u'it',), 3))
(11, ((u'by',), 3))
(12, ((u'been',), 3))
(13, ((u's',), 3))
(14, ((u'is',), 3))
(15, ((u'there',), 3))
(16, ((u'have',), 2))
(17, ((u'earth',), 2))
(18, ((u'sometimes',), 2))
(19, ((u'also',), 2))
(20, ((u'oxygen',), 2))
(21, ((u'jungle',), 2))
(22, ((u'rainfall',), 2))
(23, ((u'for',), 2))
(24, ((u'through',), 2))
(25, ((u'called',), 2))
(26, ((u'be',), 2))
(27, ((u'world',), 2))
(28, ((u'species',), 2))
(29, ((u'ground',), 2))
(30, ((u'shrubs',), 1))
(31, ((u'may',), 1))
(32, ((u'biotic',), 1))
(33, ((u'from',), 1))
(34, ((u'respiration',), 1))
(35, ((u'known',), 1))
(36, ((u'largest',), 1))
(37, ((u'discovered',), 1))
(38, ((u'two',), 1))
(39, ((u'plants',), 1))
(40, ((u'conditions',), 1))
(41, ((u'insects',), 1))
(42, ((u'necessary',), 1))
(43, ((u'1',), 1))
(44, ((u'convergence',), 1))
(45, ((u'jewels',), 1))
(46, ((u'poor',), 1))
(47, ((u'estimated',), 1))
(48, ((u'if',), 1))
(49, ((u'creating',), 1))
(50, ((u'that',), 1))
(51, ((u'75',), 1))
(52, ((u'growth',), 1))
(53, ((u'penetration',), 1))
(54, ((u'thinned',), 1))
(55, ((u'has',), 1))
(56, ((u'characterized',), 1))
(57, ((u'plays',), 1))
(58, ((u'temperate',), 1))
(59, ((u'production',), 1))
(60, ((u'because',), 1))
(61, ((u'high',), 1))
(62, ((u'98',), 1))
(63, ((u'trough',), 1))
(64, ((u'centimetres',), 1))
(65, ((u'over',), 1))
(66, ((u'some',), 1))
(67, ((u'undiscovered',), 1))
(68, ((u'natural',), 1))
(69, ((u'still',), 1))
(70, ((u'misnamed',), 1))
(71, ((u'all',), 1))
(72, ((u'many',), 1))
(73, ((u'sunlight',), 1))
(74, ((u'millions',), 1))
(75, ((u'dioxide',), 1))
(76, ((u'around',), 1))
(77, ((u'28',), 1))
(78, ((u'monsoon',), 1))
(79, ((u'canopy',), 1))
(80, ((u'photosynthesis',), 1))
(81, ((u'level',), 1))
(82, ((u'177',), 1))
(83, ((u'trees',), 1))
(84, ((u'carbon',), 1))
(85, ((u'one',), 1))
(86, ((u'4',), 1))
(87, ((u'between',), 1))
(88, ((u'areas',), 1))
(89, ((u'responsible',), 1))
(90, ((u'as',), 1))
(91, ((u'vines',), 1))
(92, ((u'450',), 1))
(93, ((u'turnover',), 1))
(94, ((u'leaf',), 1))
(95, ((u'role',), 1))
(96, ((u'indigenous',), 1))
(97, ((u'can',), 1))
(98, ((u'with',), 1))
(99, ((u'types',), 1))
(100, ((u'alternatively',), 1))
(101, ((u'annual',), 1))
(102, ((u'generally',), 1))
(103, ((u'zone',), 1))
(104, ((u'beneath',), 1))
(105, ((u'significant',), 1))
(106, ((u'consuming',), 1))
(107, ((u'microorganisms',), 1))
(108, ((u'applied',), 1))
(109, ((u'soon',), 1))
(110, ((u'2',), 1))
(111, ((u'tangled',), 1))
(112, ((u'250',), 1))
(113, ((u'restricted',), 1))
(114, ((u'undergrowth',), 1))
(115, ((u'medicines',), 1))
(116, ((u'climatic',), 1))
(117, ((u'colonized',), 1))
(118, ((u'forests',), 1))
(119, ((u'dense',), 1))
(120, ((u'pharmacy',), 1))
(121, ((u'quarter',), 1))
(122, ((u'intertropical',), 1))
(123, ((u'term',), 1))
(124, ((u'or',), 1))
(125, ((u'destroyed',), 1))
(126, ((u'processing',), 1))
(127, ((u'3',), 1))
(128, ((u'small',), 1))
(129, ((u'40',), 1))
    =========== start data_1 ==============
(0, ((u'the',), 15))
(1, ((u'of',), 8))
(2, ((u'in',), 6))
(3, ((u'and',), 6))
(4, ((u'tropical',), 5))
(5, ((u'cm',), 4))
(6, ((u'to',), 3))
(7, ((u'are',), 3))
(8, ((u'rainforests',), 3))
(9, ((u'forests',), 3))
(10, ((u'south',), 2))
(11, ((u'from',), 2))
(12, ((u'it',), 2))
(13, ((u'g',), 2))
(14, ((u'no',), 2))
(15, ((u'known',), 2))
(16, ((u'rainforest',), 2))
(17, ((u'exceed',), 2))
(18, ((u'although',), 2))
(19, ((u'typically',), 2))
(20, ((u'america',), 2))
(21, ((u'e',), 2))
(22, ((u'many',), 2))
(23, ((u's',), 2))
(24, ((u'between',), 2))
(25, ((u'as',), 2))
(26, ((u'is',), 2))
(27, ((u'with',), 2))
(28, ((u'zone',), 2))
(29, ((u'congo',), 2))
(30, ((u'tropic',), 2))
(31, ((u'equatorial',), 1))
(32, ((u'within',), 1))
(33, ((u'located',), 1))
(34, ((u'convergence',), 1))
(35, ((u'now',), 1))
(36, ((u'el',), 1))
(37, ((u'by',), 1))
(38, ((u'saharan',), 1))
(39, ((u'average',), 1))
(40, ((u'lungs',), 1))
(41, ((u'less',), 1))
(42, ((u'64',), 1))
(43, ((u'have',), 1))
(44, ((u'degreef',), 1))
(45, ((u'temperatures',), 1))
(46, ((u'1',), 1))
(47, ((u'africa',), 1))
(48, ((u'earth',), 1))
(49, ((u'200',), 1))
(50, ((u'australia',), 1))
(51, ((u'18',), 1))
(52, ((u'peninsula',), 1))
(53, ((u'indonesia',), 1))
(54, ((u'that',), 1))
(55, ((u'390',), 1))
(56, ((u'been',), 1))
(57, ((u'10',), 1))
(58, ((u'characterized',), 1))
(59, ((u'also',), 1))
(60, ((u'yucatan',), 1))
(61, ((u'6',), 1))
(62, ((u'such',), 1))
(63, ((u'months',), 1))
(64, ((u'000',), 1))
(65, ((u'islands',), 1))
(66, ((u'trough',), 1))
(67, ((u'dry',), 1))
(68, ((u'66',), 1))
(69, ((u'equator',), 1))
(70, ((u'season',), 1))
(71, ((u'mean',), 1))
(72, ((u'sub',), 1))
(73, ((u'oxygen',), 1))
(74, ((u'degrees',), 1))
(75, ((u'7',), 1))
(76, ((u'rainfall',), 1))
(77, ((u'lanka',), 1))
(78, ((u'all',), 1))
(79, ((u'monthly',), 1))
(80, ((u'cancer',), 1))
(81, ((u'monsoon',), 1))
(82, ((u'asia',), 1))
(83, ((u'on',), 1))
(84, ((u'photosynthesis',), 1))
(85, ((u'degreec',), 1))
(86, ((u'southern',), 1))
(87, ((u'location',), 1))
(88, ((u'addition',), 1))
(89, ((u'sri',), 1))
(90, ((u'capricorn',), 1))
(91, ((u'southeast',), 1))
(92, ((u'warm',), 1))
(93, ((u'found',), 1))
(94, ((u'through',), 1))
(95, ((u'cameroon',), 1))
(96, ((u'climate',), 1))
(97, ((u'called',), 1))
(98, ((u'bosawas',), 1))
(99, ((u'pacific',), 1))
(100, ((u'69',), 1))
(101, ((u'5',), 1))
(102, ((u'can',), 1))
(103, ((u'burma',), 1))
(104, ((u'79',), 1))
(105, ((u'papua',), 1))
(106, ((u'annual',), 1))
(107, ((u'lies',), 1))
(108, ((u'atmosphere',), 1))
(109, ((u'substantial',), 1))
(110, ((u'new',), 1))
(111, ((u'168',), 1))
(112, ((u'category',), 1))
(113, ((u'moist',), 1))
(114, ((u'year',), 1))
(115, ((u'little',), 1))
(116, ((u'contribute',), 1))
(117, ((u'during',), 1))
(118, ((u'175',), 1))
(119, ((u'belize',), 1))
(120, ((u'wet',), 1))
(121, ((u'than',), 1))
(122, ((u'guinea',), 1))
(123, ((u'north',), 1))
(124, ((u'philippines',), 1))
(125, ((u'hawai\u02bbi',), 1))
(126, ((u'myanmar',), 1))
(127, ((u'world',), 1))
(128, ((u'peten',), 1))
(129, ((u'exist',), 1))
(130, ((u'net',), 1))
(131, ((u'a',), 1))
(132, ((u'broader',), 1))
(133, ((u'intertropical',), 1))
(134, ((u'calakmul',), 1))
(135, ((u'central',), 1))
(136, ((u'associated',), 1))
(137, ((u'malaysia',), 1))
(138, ((u'amazon',), 1))
    =========== start data_2 ==============
(0, ((u'in',), 11))
(1, ((u'the',), 9))
(2, ((u'and',), 9))
(3, ((u'of',), 7))
(4, ((u'temperate',), 3))
(5, ((u'southern',), 3))
(6, ((u'as',), 3))
(7, ((u'coastal',), 3))
(8, ((u'rainforests',), 3))
(9, ((u'east',), 2))
(10, ((u'parts',), 2))
(11, ((u'america',), 2))
(12, ((u'areas',), 2))
(13, ((u'british',), 2))
(14, ((u'coast',), 2))
(15, ((u'occur',), 2))
(16, ((u'regions',), 2))
(17, ((u'are',), 1))
(18, ((u'turkey',), 1))
(19, ((u'they',), 1))
(20, ((u'on',), 1))
(21, ((u'australia',), 1))
(22, ((u'far',), 1))
(23, ((u'oregon',), 1))
(24, ((u'galicia',), 1))
(25, ((u'chile',), 1))
(26, ((u'island',), 1))
(27, ((u'few',), 1))
(28, ((u'zealand',), 1))
(29, ((u'columbia',), 1))
(30, ((u'but',), 1))
(31, ((u'world',), 1))
(32, ((u'sea',), 1))
(33, ((u'taiwan',), 1))
(34, ((u'northwest',), 1))
(35, ((u'europe',), 1))
(36, ((u'10',), 1))
(37, ((u'much',), 1))
(38, ((u'also',), 1))
(39, ((u'north',), 1))
(40, ((u'adriatic',), 1))
(41, ((u'such',), 1))
(42, ((u'cover',), 1))
(43, ((u'forests',), 1))
(44, ((u'part',), 1))
(45, ((u'including',), 1))
(46, ((u'western',), 1))
(47, ((u'a',), 1))
(48, ((u'norway',), 1))
(49, ((u'large',), 1))
(50, ((u'georgia',), 1))
(51, ((u'well',), 1))
(52, ((u'south',), 1))
(53, ((u'globe',), 1))
(54, ((u'tropical',), 1))
(55, ((u'adjacent',), 1))
(56, ((u'washington',), 1))
(57, ((u'only',), 1))
(58, ((u'russian',), 1))
(59, ((u'pacific',), 1))
(60, ((u'japan',), 1))
(61, ((u'black',), 1))
(62, ((u'along',), 1))
(63, ((u'highlands',), 1))
(64, ((u'ireland',), 1))
(65, ((u'sakhalin',), 1))
(66, ((u'balkans',), 1))
(67, ((u'korea',), 1))
(68, ((u'asia',), 1))
(69, ((u'around',), 1))
(70, ((u'scotland',), 1))
(71, ((u'eastern',), 1))
(72, ((u'alaska',), 1))
(73, ((u'china',), 1))
(74, ((u'isles',), 1))
(75, ((u'new',), 1))
(76, ((u'california',), 1))
在本例中,单词雨林世界森林和其他一些更常见的单词都在这三个数据集中

我现在要做的是找到不止一个列表中的单词

例如,我想说雨林这个词在三分之三的列表中

另一方面,单词氧在2/3列表中,它在数据0和数据1中。

您可以使用(如果您使用的是Python 3.x)


查找多个列表交集的最简单方法是使用列表切片功能以及
set.intersection()
。例如:

my_list =[
    ['cat', 'dog', 'fan'],
    ['cat', 'dog', 'pine'],
    ['cat', 'light', 'tree', 'dog'],
    ['dog', 'pine', 'cat', 'tree'],
    ['fan', 'pine', 'dog', 'tree', 'cat'],
    ['light', 'dog', 'pine', 'cat', 'tree']]
然后,所有列表的交点可以计算为:

#                              v  Unwrapped list from index '1'
set(my_list[0]).intersection(*my_list[1:])
#           ^ First element in list 
将返回:

set(['dog', 'cat'])

编辑:看起来您不需要交叉点。您需要根据以下语句在所有列表中查找项目计数:

我希望能够找出单词cat出现在0,1,2,…N列表中

如果您只关心项目的
计数
,则可以与as一起使用:

其中
my_count
将保持:

{'dog': 6, 
 'cat': 6, 
 'tree': 4, 
 'pine': 4, 
 'light': 2, 
 'fan': 2}

如果还希望项目与其列表的映射,则可以创建
dict
来映射项目。但是,首先您需要将所有项目合并为:

all_items = set(my_list[0]).union(*my_list[1:])
# which will hold: set(['light', 'tree', 'dog', 'pine', 'cat', 'fan'])
然后将其存储在
dict
中。我使用的是:

from collections import defaultdict
my_dict = defaultdict(list)

for item in all_items:
    for sub_list in my_list:
        my_dict[item].append(item in sub_list)
现在
my_dict
将保存值:

{
     'light': [False, False, True, False, False, True], 
     #          ^              ^ Present in list 3
     #          ^  Not present in list 1
     'tree': [False, False, True, True, True, True], 
     'dog': [True, True, True, True, True, True], 
     'pine': [False, True, False, True, True, True], 
     'cat': [True, True, True, True, True, True], 
     'fan': [True, False, False, False, True, False]
}
您可以从该目录中找到发生次数。如果您可以使用而不是
列表,则这是一个概括:

要创建我们使用的所有组合,请执行以下操作:

我们将使用可散列的
frozenset
(稍后用作
dict
键):

通过在所有组合中应用来创建映射(例如,选取大小为3的组合),将结果存储在
目录中

intersections = {frozenset(k): frozenset.intersection(*k) for k in combinations(sets, 3)}
结果:

{frozenset({frozenset({2, 3, 4}), frozenset({3, 4, 5}), frozenset({4, 5, 6})}): frozenset({4}), frozenset({frozenset({1, 2, 3}), frozenset({2, 3, 4}), frozenset({4, 5, 6})}): frozenset(), frozenset({frozenset({1, 2, 3}), frozenset({3, 4, 5}), frozenset({4, 5, 6})}): frozenset(), frozenset({frozenset({1, 2, 3}), frozenset({2, 3, 4}), frozenset({3, 4, 5})}): frozenset({3})}

尽管您的问题大部分都是关于集合的交集,但您实际想要的似乎与该概念没有直接关系:

我希望能够找出单词cat出现在0,1,2,…N列表中

您可以在不涉及交叉点、集合等的情况下找到这一点:

one = ['cat', 'dog', 'pine']
two = ['cat', 'fan', 'pine']
three = ['cat', 'pine', 'tree']
four = ['dog', 'pine', 'tree']
five = ['fan', 'pine', 'tree']
six = ['light', 'pine', 'tree']

这是因为
True
在算术中使用时,其作用类似于整数
1
(基于该整数的
sum()

如果你真正想要的是所有“集合”的交集,那也很简单:

>>> set.intersection(*(set(s) for s in (one, two, three, four, five, six)))
{'pine'}
更新:现在您已经澄清了问题,很明显,您实际上需要计算一个单词在各种列表中出现的次数。除了上面描述的计算单个单词出现次数的方法外,正如我在on(以及后来添加到的moinuddinquadi)中提到的,Python中实现这一点的惯用方法是使用and:

精炼:

one=['cat','dog','fan']
二=['猫','狗','松']
三个=['cat'、'light'、'tree']
四个=['dog'、'pine'、'tree']
五=['扇','松','树']
六=['光'、'松'、'树']
列表=一+二+三+四+五+六
[(e,lists.count(e))集合中的e(list)]
#=>[('light',2),('tree',4),('dog',3),('pine',4),('cat',3),('fan',2)]

这也可以表示为
collections.Counter(itertools.chain(一、二、三、四、五、六))
这绝对是我需要复习的模块。谢谢正如你所看到的,根据对你的问题的不同解释,你已经收到了许多不同的答案。你能澄清一下你真正需要什么吗?我添加了一些澄清和一些带有测试数据的示例代码。在这种情况下,你要寻找的是各种列表中的项目计数,而不是交叉点,下面至少有一个答案应该能帮你解决问题。我喜欢你的代码生成的我的dict映射,你能看看我做的编辑吗?我补充了一些澄清,说明我在哪一部分遇到了更多的困难。
one = ['cat', 'dog', 'pine']
two = ['cat', 'fan', 'pine']
three = ['cat', 'pine', 'tree']
four = ['dog', 'pine', 'tree']
five = ['fan', 'pine', 'tree']
six = ['light', 'pine', 'tree']
>>> sum(True for s in (one, two, three, four, five, six) if 'cat' in s)
3
>>> sum(True for s in (one, two, three, four, five, six) if 'tree' in s)
4
>>> set.intersection(*(set(s) for s in (one, two, three, four, five, six)))
{'pine'}
>>> from collections import Counter
>>> from itertools import chain
>>> counts = Counter(chain(one, two, three, four, five, six))
>>> counts
Counter({'pine': 6, 'tree': 4, 'cat': 3, 'dog': 2, 'fan': 2, 'light': 1})
>>> counts['cat']
3