Algorithm 查找字符串不同的第一个索引的算法？_Algorithm_Arrays_Sorting_Indexing_Unique

Algorithm 查找字符串不同的第一个索引的算法？

algorithm arrays sorting indexing

Algorithm 查找字符串不同的第一个索引的算法？,algorithm,arrays,sorting,indexing,unique,Algorithm,Arrays,Sorting,Indexing,Unique,我有一个字符串集合，我需要知道第一个索引，它们的不同之处。我可以想出两种方法来做到这一点：（下面的伪代码是我脑子里想不出来的，可能有很多bug）第一种方式： var minLength = [go through all strings finding min length]; var set = new set() for(i=0;i<minlength;i++) { for(str in strings) { var substring = str.substring

我有一个字符串集合，我需要知道第一个索引，它们的不同之处。我可以想出两种方法来做到这一点：（下面的伪代码是我脑子里想不出来的，可能有很多bug）

第一种方式：

var minLength = [go through all strings finding min length];
var set = new set()
for(i=0;i<minlength;i++)
{
  for(str in strings)
  {
    var substring = str.substring(0,i);
    if(set.contains(substring))
      break; // not all different yet, increment i
    set.add(substring)
  }
  set.clear(); // prepare for next length of substring
}

var minLength=[遍历所有字符串查找最小长度]；
变量集=新集（）
对于（i=0；i使用您建议的集合，这正是正确的做法。
inti=0；
int i = 0;
while(true)
{
    Set set = new Set();
    for(int j = 0; j < strings.length; j++)
    {
         if(i >= strings[j].length) return i;
         String chr = strings[j].charAt(i);
         if(set.hasElement(chr))
             break;
         else
             set.addElement(chr);
    }
    if(set.size() == strings.length)
        return i;
    i++;
}

while（true）
{
集合=新集合（）；
对于（int j=0；j=strings[j].length）返回i；
字符串chr=strings[j].charAt（i）；
if（集合元素（chr））
打破
其他的
附加元件（chr）；
}
if（set.size（）==strings.length）
返回i；
i++；
}

首先要检查前提条件
编辑：现在使用集合。更改语言。
这是未经测试的，但这是我的尝试。（我可能会让它变得比我必须做的更复杂，但我认为这是一种不同的看待方式。）
基本思想是编译在第一个元素处匹配的项目组，然后为每个组找到最大唯一索引，在每个连续索引处检查元素
int FirstUniqueIndex<T>(IEnumerable<IEnumerable<T>> myArrayCollection)
{
    //just an overload so you don't have to specify index 0 all the time
    return FirstUniqueIndex(myArrayCollection, 0);
}

int FirstUniqueIndex<T>(IEnumerable<IEnumerable<T>> myArrayCollection, int StartIndex)
{
    /* Group the current collection by the element at StartIndex, and
     * return a collection of these groups. Additionally, we're only interested
     * in the groups with more than one element, so only get those.*/

    var groupsWithMatches = from var item in myArrayCollection //for each item in the collection (called "item")
                            where item.Length > StartIndex //that are long enough
                            group by item[StartIndex] into g //group them by the element at StartIndex, and call the group "g"
                            where g.Skip(1).Any() //only want groups with more than one element
                            select g; //add the group to the collection

    /* Now "groupsWithMatches" is an enumeration of groups of inner matches of
     * your original arrays. Let's process them... */

    if(groupsWithMatches.Any()) 
        //some matches were found - check the next index for each group
        //(get the maximum unique index of all the matched groups)
        return groupsWithMatches.Max(group => FirstUniqueIndex(group, StartIndex + 1));
    else
        //no matches found, all unique at this index
        return StartIndex;
}

int FirstUniqueIndex（IEnumerable myArrayCollection）
{
//只是一个重载，所以您不必一直指定索引0
返回FirstUniqueIndex（myArrayCollection，0）；
}
int FirstUniqueIndex（IEnumerable myArrayCollection，int StartIndex）
{
/*按StartIndex处的元素对当前集合进行分组，然后
*返回这些组的集合。此外，我们只感兴趣
*在包含多个元素的组中，因此仅获取这些元素*/
var groupsWithMatches=来自myArrayCollection//中的var项，用于集合中的每个项（称为“项”）
其中item.Length>StartIndex//足够长
按项[StartIndex]分组到g//按StartIndex处的元素将它们分组，并调用组“g”
其中g.Skip（1）.Any（）//只需要包含多个元素的组
选择g；//将组添加到集合中
/*现在，“groupsWithMatches”是一组内部匹配的枚举
*您的原始阵列。让我们处理它们*/
if（groupsWithMatches.Any（））
//找到了一些匹配项-检查每组的下一个索引
//（获取所有匹配组的最大唯一索引）
返回groupsWithMatches.Max（group=>FirstUniqueIndex（group，StartIndex+1））；
其他的
//未找到匹配项，此索引中的所有项都是唯一的
返回StartIndex；
}


对于上面的非LINQ版本（我会将其更改为使用列表集合，但任何集合都可以）。我甚至会删除lambda。再次未测试，所以尽量不要将锋利的工具对准我的方向
int FirstUniqueIndex<T>(List<List<T>> myArrayCollection, int StartIndex)
{
    /* Group the current collection by the element at StartIndex, and
     * return a collection of these groups. Additionally, we're only interested
     * in the groups with more than one element, so only get those.*/

    Dictionary<T, List<List<T>>> groupsWithMatches = new Dictionary<T, List<List<T>>>();

    //group all the items by the element at StartIndex
    foreach(var item in myArrayCollection)
    {
        if(item.Count > StartIndex)
        {
            List<List<T>> group;
            if(!groups.TryGetValue(item[StartIndex], out group))
            {
                //new group, so make it first
                group = new List<List<T>>();
                groups.Add(item[StartIndex], group);
            }

            group.Add(Item);
        }
    }

    /* Now "groups" is an enumeration of groups of inner matches of
     * your original arrays. Let's get the groups with more than one item. */

    List<List<List<T>>> groupsWithMatches = new List<List<List<T>>>(groups.Count);

    foreach(List<List<T> group in groupsWithMatches)
    {
        if(group.Count > 1)
            groupsWithMatches.Add(group);
    }

    if(groupsWithMatches.Count > 0)
    {
        //some matches were found - check the next index for each group
        //(get the maximum unique index of all the matched groups)

        int max = -1;
        foreach(List<List<T>> group in groupsWithMatches)
        {
            int index = FirstUniqueIndex(group, StartIndex + 1);
            max = index > max ? index : max;
        }
        return max;
    }
    else
    {
        //no matches found, all unique at this index
        return StartIndex;
    }
}

int FirstUniqueIndex（列出myArrayCollection，int StartIndex）
{
/*按StartIndex处的元素对当前集合进行分组，然后
*返回这些组的集合。此外，我们只感兴趣
*在包含多个元素的组中，因此仅获取这些元素*/
Dictionary groupsWithMatches=新字典（）；
//按StartIndex中的元素对所有项目进行分组
foreach（myArrayCollection中的var项）
{
如果（item.Count>StartIndex）
{
列表组；
if（！groups.TryGetValue（项[StartIndex]，组外））
{
//新组，所以请先创建它
组=新列表（）；
添加（项目[StartIndex]，组）；
}
组。添加（项目）；
}
}
/*现在“groups”是一个内部匹配组的枚举
*您的原始数组。让我们获取包含多个项的组*/
列表组匹配=新列表（groups.Count）；
foreach（列表1）
组匹配。添加（组）；
}
如果（groupsWithMatches.Count>0）
{
//找到了一些匹配项-检查每组的下一个索引
//（获取所有匹配组的最大唯一索引）
int max=-1；
foreach（组匹配中的列表组）
{
int-index=FirstUniqueIndex（组，StartIndex+1）；
最大=索引>最大？索引：最大；
}
返回最大值；
}
其他的
{
//未找到匹配项，此索引中的所有项都是唯一的
返回StartIndex；
}
}
您应该能够做到这一点，而无需排序，并且在最坏的情况下只需查看每个字符串中的每个字符一次
下面是一个将索引放入控制台的ruby脚本：
mystrings = ["apple", "banana", "cucumber", "banking"]
minlength = getMinLengthString(mystrings) #not defined here

char_set = {}

(0..minlength).each do |char_index|
  char_set[mystrings[0][char_index].chr] = 1
  (1..mystrings.length).each do |string_index|
    comparing_char = mystrings[string_index][char_index].chr
    break if char_set[comparing_char]
    if string_index == (mystrings.length - 1) then
      puts string_index
      exit
    else
      char_set[comparing_char] = 1
    end     
  end
  char_set.clear
end
puts minlength

结果是3
下面是C#中相同的通用代码片段，如果您更清楚的话：
string[] mystrings = { "apple", "banana", "cucumber", "banking" };

//defined elsewhere...
int minlength = GetMinStringLengthFromStringArray(mystrings);

Dictionary<char, int> charSet = new Dictionary<char, int>();

for (int char_index = 0; char_index < minlength; char_index++)
{
    charSet.Add(mystrings[0][char_index], 1);

    for (int string_index = 1; string_index < mystrings.Length; string_index++)
    {
        char comparing_char = mystrings[string_index][char_index];

        if (charSet.ContainsKey(comparing_char))
        {
             break;
        }
        else
        {
             if (string_index == mystrings.Length - 1)
             {
                  Console.Out.WriteLine("Index is: " + string_index.ToString());
                  return;
             }
             else
             {
                  charSet.Add(comparing_char, 1);
             }
        }
    }

    charSet.Clear();
}
Console.Out.WriteLine("Index is: " + minlength.ToString());

string[]mystrings={“苹果”、“香蕉”、“黄瓜”、“银行”}；
//在别处定义。。。
int minlength=GetMinStringLengthFromStringArray（myString）；
字典字符集=新字典（）；
对于（int char_index=0；char_index
以下是我的Python解决方案：
words = ["apple", "banana", "cucumber", "banking"]

for i in range(len(min(words))):
    d = defaultdict(int)
    for word in words:
        d[word[i]] += 1
    if max(d.values()) == 1:
        return i

我没有写任何东西来处理当你到达最短单词的末尾时没有找到最小索引的情况，但我相信你明白了
words = ["apple", "banana", "cucumber", "banking"]

for i in range(len(min(words))):
    d = defaultdict(int)
    for word in words:
        d[word[i]] += 1
    if max(d.values()) == 1:
        return i