Design an algorithm and write code to remove the duplicate characters in a string without using any additional buffer. NOTE: One or two additional variables are fine. An extra copy of the array is not.
Write the test cases for this method.
去除掉一个字符数组中的重复字符,不能使用附加的缓存空间,并设计测试用例

本文中的C++算法来自于HawStein的博客。

题目中已经要求不可以有额外的buffer,所以得在原数组的基础上操作。
此外,假设所有字符char的取值大小只有[0,256]
算法思想:遍历整个buffer,如果有一个字符,将这个字符写入到s[p]的位置上去(p必然小于等于i),并且遍历后面,和它相同的字符全部标为’\0’。

#include<cstring>
void RemoveDuplicate(char s[]){
    int len = strlen(s);
    int p = 0;
    for (int i = 0; i < len; i++)
    {
        if (s[i]!='\0')
        {
            s[p++] = s[i];
            for (int j = i + 1; j < len;j++)
                if (s[i] == s[j])
                    s[j] = '\0';
        }
    }
    s[p] = '\0';
}

这个方法的时间复杂度为O(n^2)

如果可以开一个固定大小的数组(即大小与n无关),那么时间复杂度可以到O(n)
算法思想:开设一个bool数组来记录字符的状态

void RemoveDuplicate1(char s[]){
    int len = strlen(s);
    bool c[256];
    int p = 0;
    memset(c, 0, sizeof(c));
    for (int i = 0; i < len; i++){
        if (!c[s[i]]){
            c[s[i]] = true;
            s[p++] = s[i];
        }
    }
    s[p] = '\0';
}

性能测试

基础测试用例

#include<stdio.h>
#include<stdlib.h>
#include<cstring>
void RemoveDuplicate(char s[]);
void RemoveDuplicate1(char s[]);
void RemoveDuplicate2(char s[]);
int main(){

    char s1[] = "starcraft";
    char s2[] = "llllllll";
    char s3[] = "";
    char s4[] = "ssttaarrccrraafftt";
    printf("====RemoveDuplicate====\n");
    RemoveDuplicate(s1);printf("%s\n",s1);
    RemoveDuplicate(s2);printf("%s\n",s2);


    printf("====RemoveDuplicate1====\n");   
    RemoveDuplicate1(s3); printf("%s\n", s3);
    RemoveDuplicate1(s4); printf("%s\n", s4);





    system("pause");
    return 0;
}

====RemoveDuplicate====
starcf
l
====RemoveDuplicate1====

starcf

请按任意键继续. . .

性能比较
为了获得足够大的字符数组,我修改了一下之前的测试用例生成函数,使其可以生成任意大小的字符数组。

#include <time.h>
#include <stdio.h>
#include <random>
#include <stdlib.h>
#include <math.h>
#define random(x) (rand()%x)
char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

/******生成测试数据data,并且将生成的数组写入到文件中保存***********/
//*****type为'c'时指定生成一个随机字符数组

void* get_test_data (const char* file_name, int size,char type,int maxnum){
    void* data;
    if (type == 'c')
       data = (char*)malloc(sizeof(char)*size);
    else
       data = (int*)malloc(sizeof(int)*size);
    FILE* output_file;
    fopen_s(&output_file,file_name, "w+");
    if (NULL == output_file){
        return NULL;
    }
    srand((int)time(0));
    for (int i = 0; i < size; i++){
        if (type == 'c'){
            char* temp = (char*)data;
            int ram = random(sizeof(charset)-2);
            if (ram < 0)
                ram *= -1;
            temp[i] = charset[ram];
            if (i == size - 1)
                temp[i] = '\0';
            fprintf(output_file, "%c", temp[i]);
        }
        else if (type == 'd')
        {
            ((int*)data)[i] = random(maxnum);

            fprintf(output_file, "%d", ((int*)data)[i]);
        }
    }

    return data;
}

测试用例

#include<stdio.h>
#include<stdlib.h>
#include<cstring>
#define SIZE 1000
void RemoveDuplicate(char s[]);
void RemoveDuplicate1(char s[]);
void RemoveDuplicate2(char s[]);
void* get_test_data(const char* file_name, int size,char type,int maxnum);

int main(){
    /*printf("====RemoveDuplicate2====\n");
    RemoveDuplicate2(s1); printf("%s\n", s1);
    RemoveDuplicate2(s2); printf("%s\n", s2);
    RemoveDuplicate2(s3); printf("%s\n", s3);
    RemoveDuplicate2(s4); printf("%s\n", s4);*/

    char *bc=(char*)get_test_data("test.txt",SIZE,'c',0);
    char  bc_copy[SIZE];
    memcpy_s(bc_copy,SIZE,bc,SIZE);
    printf("%s\n=========\n", bc);
    printf("%s\n=========\n", bc_copy);
    RemoveDuplicate(bc); printf("%s\n", bc);
    RemoveDuplicate1(bc_copy); printf("%s\n", bc_copy);
    system("pause");
    return 0;
}

结果

slbkoXzycbXuWseuyrwyCjVusUcTggHHFdkrBMYIoHNTKWMEBajIBATVQSPDNclUEfgobDylLRVzVuVR
cajBbDKSGuLpbiGQEccsQfJNsUFGncOeMNujfDxUxijXITEnMOSHrXcxvzInvmGNUVNIvFLBomNxcDTq
ovkKsIQhivebpLMYSPFqLqTVkVhMWmorQabHwiMfAkqScUgUfVGmQWTRjVLnAaHiOqzCEbgtizvHoIkj
CNBnYKhbcVmyqOrKvCLORwhMnOskqVTCUzfaUzlJBnLpnKDlwoqxTthnzPSgcLpJWYstpAtRpDLLbFck
PJsJHodnUWWABoYyEYVjiXXziRCJbEUcpwQRFKWEFDrjbvUwVoDStIbIcLUwIRAyMrpRuywOjMABUGtY
csemrzEHPhTclDMoeaVOIzIMWycsvsArRPDoIXAwoblPSttHOfdflOKlQilTnGKMyySsXAcEObKGRTfg
CvMyLHwIVAVUtJdJfHTUApBHsMrLkOgNmXuOviCgTQdQGTVfopaPgxJjbqkSzghUeUOvTgMpJsxOuJHr
sUFNHzjLdgXRVNybtIaurnNMlfILQsERqycXbxuuWbIHWrgAODVsRgGVcBboVyesbuBrKyAcRKSsMFmj
ajgAnwjSzvBUbubjRwOsXcLEiJjMBUVHmlqQhVpCKhNKwtLFsmBcvNqnuGoUibEYVFncFqgSeXniHlxH
yBgJmueKrfSrbnNnqxhmcrfOCNbbgnXsuqthGQuIilFWAdsUgSecEfMtbzFSfwjOylxVTkqdVWclsEmb
GpGJSJaXuNSxOlbnUXjXRTdCvImdvJcvIzVFtsARccwpbldcyuIcdlrMXDccXSkCLfRQNDrDAIcqoqvq
dbaQGhfLQbEbyhpogAhRzCfpiLABUkBqLBGIokjcvYBtLWlQOiakscoamTYUIXGXaOzRnwGSEvDxUIxY
zuDJdLDwdVXWMrKzgXOEpOBQJQlTVepnrSPBFlk
=========
slbkoXzycbXuWseuyrwyCjVusUcTggHHFdkrBMYIoHNTKWMEBajIBATVQSPDNclUEfgobDylLRVzVuVR
cajBbDKSGuLpbiGQEccsQfJNsUFGncOeMNujfDxUxijXITEnMOSHrXcxvzInvmGNUVNIvFLBomNxcDTq
ovkKsIQhivebpLMYSPFqLqTVkVhMWmorQabHwiMfAkqScUgUfVGmQWTRjVLnAaHiOqzCEbgtizvHoIkj
CNBnYKhbcVmyqOrKvCLORwhMnOskqVTCUzfaUzlJBnLpnKDlwoqxTthnzPSgcLpJWYstpAtRpDLLbFck
PJsJHodnUWWABoYyEYVjiXXziRCJbEUcpwQRFKWEFDrjbvUwVoDStIbIcLUwIRAyMrpRuywOjMABUGtY
csemrzEHPhTclDMoeaVOIzIMWycsvsArRPDoIXAwoblPSttHOfdflOKlQilTnGKMyySsXAcEObKGRTfg
CvMyLHwIVAVUtJdJfHTUApBHsMrLkOgNmXuOviCgTQdQGTVfopaPgxJjbqkSzghUeUOvTgMpJsxOuJHr
sUFNHzjLdgXRVNybtIaurnNMlfILQsERqycXbxuuWbIHWrgAODVsRgGVcBboVyesbuBrKyAcRKSsMFmj
ajgAnwjSzvBUbubjRwOsXcLEiJjMBUVHmlqQhVpCKhNKwtLFsmBcvNqnuGoUibEYVFncFqgSeXniHlxH
yBgJmueKrfSrbnNnqxhmcrfOCNbbgnXsuqthGQuIilFWAdsUgSecEfMtbzFSfwjOylxVTkqdVWclsEmb
GpGJSJaXuNSxOlbnUXjXRTdCvImdvJcvIzVFtsARccwpbldcyuIcdlrMXDccXSkCLfRQNDrDAIcqoqvq
dbaQGhfLQbEbyhpogAhRzCfpiLABUkBqLBGIokjcvYBtLWlQOiakscoamTYUIXGXaOzRnwGSEvDxUIxY
zuDJdLDwdVXWMrKzgXOEpOBQJQlTVepnrSPBFlk
=========
slbkoXzycuWerwCjVUTgHFdBMYINKEaAQSPDfLRGpiJnOxvmqht
slbkoXzycuWerwCjVUTgHFdBMYINKEaAQSPDfLRGpiJnOxvmqht
请按任意键继续. . .