|
代码:
- %本文件生成了频繁项集
- clear all;
- clc;
- TotalSheet=[];
- dbstop if error;
- brand_name=csvread('./data/data.csv');
- brand_name=brand_name(:,2);
- brand_name=unique(brand_name);
- thconf=0.6;%最小置信度阈值
- load('./data/table');
- X=table;
- [m,n]=size(X);
- D=X;
- th=ceil(0.1*m);
- %%
- %寻找频繁1项集
- C1=sum(D);
- C1=C1';
- C1=[[1:n]' C1];
- L1=C1;
- support=L1(:,2);
- L1(find(support<th),:)=[];
- TotalSheet{1}=L1;
- %%
- %生成频繁2项集
- C2=combntns(L1(:,1),2);
- temp=[];
- for i=1:size(C2)
- %项目
- temp{i,1}=C2(i,:);
-
- %支持数
- sum=0;
-
- [p,q]=size(D);
- for j=1:p
- if all(D(j,C2(i,:)))
- sum=sum+1;
- end
- end
- temp{i,2}=sum;
-
- end
- C2=temp;
- L2=C2;
- support=cell2mat(L2(:,2));
- index=find(support<th);
- L2(index,:)=[];
- TotalSheet{2}=L2;
- %%
- for k=3:6
- %生成频繁k(k>=3)项集
- L3=[];
- C3=cell2mat(L2(:,1));
- [mm,nn]=size(C3);
- item_num=mm;
- if item_num<2
- break;
- end
- comb_index=combntns(1:item_num,2);
- %按排列组合的方式进行联合
- j=1;
- for i=1:size(comb_index,1)
- comb_temp=union(L2{comb_index(i,:),1});
- len=size(comb_temp,1);
- len2=size(L2{1,1},1);
- %判断是否只差一个元素
- if len~=len2+1
- continue;
- end
- %判断子集是否都存在
- sub_comb=combntns(comb_temp,len-1);
- if size(intersect(sub_comb,C3,'rows'),1)~=size(sub_comb,1)
- continue;
- end
- L3{j,1}=comb_temp;
- j=j+1;
- end
- C3=[];
- for i=1:size(L3,1)
- C3(i,:)=L3{i,1};
- end
- temp=[];
- for i=1:size(C3)
- %项目
- temp{i,1}=C3(i,:);
-
- %支持数
- sum=0;
-
- for j=1:m
- if all(D(j,C3(i,:)))
- sum=sum+1;
- end
- end
- temp{i,2}=sum;
-
- end
- L3=temp;
- if size(L3,1)==0
- break;
- end
- support=cell2mat(L3(:,2));
- index=find(support<th);
- L3(index,:)=[];
- TotalSheet{k}=L3;
- L2=L3;
- k
- end
复制代码 |
|