Tensor in SAS

This post was kindly contributed by SAS Programming for Data Mining Applications - go there to comment and to read the full post.


Tensor, a math concept for high order array, is a very useful tool in modern data mining applications. HOSVD, the counter part of SVD in higher order array, is at the heart of modern applications, such as face recognition and clustering, segmentation of multi-dimensional transaction data, etc.

【”Matrix Method in Data Mining and Pattern Recognition” by Lars Eldén】

But implementation of math operations designed for Tensor is not easy, not to mention doing that in SAS. Due to the design nature of SAS data set, everything has to be done in a 1-D or 2-D matrix fashion. Luckily, Tensor operations have their low dimension matrix counterpart, only that matrix subscriptions need to be calculated carefully.

I spent recent couple of days to implement some featured Tensor operations, notably Tensor unfolding (Tensor to Matrix) and HOSVD for my research and work. A prototype code for 3-mode Tensor unfolding and HOSVD SAS code is shown below. Data is from Lathauwer et al., “A MULTILINEAR SINGULAR VALUE DECOMPOSITION”, SIAM J. MATRIX ANAL. APPL. vol.21, No.4



data A1;
     input X1-X3;
datalines;
 0.9073  0.8924  2.1488
 0.7158 -0.4898  0.3054
-0.3698  2.4288  2.3753
 1.7842  1.7753  4.2495
 1.6970 -1.5077  0.3207
 0.0151  4.0337  4.7146
 2.1236 -0.6631  1.8260
-0.0740  1.9103  2.1335
 1.4420 -1.7495 -0.2716
;
run;

filename Acat  catalog 'work.tensor.A2.CATAMS';

data _null_;
     file Acat;
     mode='1;';
     dimension='3,3,3;';
     put mode dimension;
run;

%let _dims=2 3 4;
%let _totalmode=3;
%let _mode0=1;
%let _modex=2;
%let _dimension2=6;
%let _dimension1=4;

data _null_;
      infile A2Cat;
      input;  
      _index=trim(scan(_infile_, 1, ';')); put _index=;
      _dimension=trim(scan(_infile_, 2, ';')); put _dimension=;       
      d2=count((_dimension), ',');  put d2=;
      call symput('_totalmode', d2+1);   
      call symput('_dims', translate(_dimension, ' ', ','));
      _dim0=scan(_dimension, &_mode0, ',');
      _dimx=scan(_dimension, &_modex, ',');
      call symput('_dim0', _dim0);
      call symput('_dimx', _dimx);
      call symput('_dim0x', compress(max(_dim0, _dimx)));
      dim2=1; dim1=1;
      do j=1 to d2+1;      
         if j=&_modex then dim1=dim1*scan(_dimension, j, ',');
         else dim2=dim2*scan(_dimension, j, ',');         
      end;
      put dim1= dim2= dim20=;
      call symput('_dimension1', compress(dim1));
      call symput('_dimension2', compress(dim2));   
run;
%put &_totalmode  &_dim0x;
%put &_dimension1  &_dimension2;
%put &_dims;

data  A&_modex;
      array _dim{&_totalmode} _temporary_ (&_dims);
      array _v{&_totalmode} _temporary_;
      array _datavec{&_dim0x} X1-X&_dim0x;
      array _X{&_dimension2, &_dimension1} _temporary_;
      set A&_mode0  end=eof;
      m1=mod(&_mode0+1, &_totalmode); if m1=0 then m1=&_totalmode;
      m2=mod(&_mode0+2, &_totalmode); if m2=0 then m2=&_totalmode;
      _v[m2]=mod(_n_, _dim[m2]); if _v[m2]=0 then _v[m2]=_dim[m2];
      _v[m1]=floor((_n_-_v[m2])/_dim[m2])+1;
      do j=1 to &_dim0;
         _v[&_mode0]=j;
         m1=mod(&_modex+1, &_totalmode); if m1=0 then m1=&_totalmode;
         m2=mod(&_modex+2, &_totalmode); if m2=0 then m2=&_totalmode;  
         _m=_v[&_modex];
         _n=(_v[m1]-1)*_dim[m2]+_v[m2];
         _X[_n, _m]=_datavec[j];
      end;
      if eof then do;
         do j=1 to &_dimension2;
            do k=1 to &_dimension1;
               _datavec[k]=_X[j, k];
            end;
            keep X1-X&_dim0x;
            output;
         end;
   end; 
run;

filename Acat  catalog "work.tensor.A&_modex..CATAMS";
data _null_;
     file Acat;
     mode="&_modex;";
     dimension="3,3,3;";
     put mode dimension;
run;

ods select none;
proc princomp data=A1  outstat=stat_1 noint  cov;
     var X1-X3;
run;
proc princomp data=A2  outstat=stat_2 noint  cov;
     var X1-X3;
run;
proc princomp data=A3  outstat=stat_3 noint  cov;
     var X1-X3;
run;
ods select all;

/***************************************
In order to obtain Tensor core S, we can 
do something like this:
S_(1)=t(U1)%*%A_(1)%*%(kronecker(U2, U3)
where S_(1) is 1-mode matrix of S and A_(1) 
is 1-mode matrix of A
****************************************/

%macro kroneck(mat_A, mat_B, mat_kronecker);
%local dsid row_B col_B  col_A;

%let dsid=%sysfunc(open(&mat_B));
%let row_B=%sysfunc(attrn(&dsid, nobs));
%let col_B=%sysfunc(attrn(&dsid, nvars));
%let dsid=%sysfunc(close(&dsid));
%let dsid=%sysfunc(open(&mat_A));
%let col_A=%sysfunc(attrn(&dsid, nvars));
%let dsid=%sysfunc(close(&dsid));

data _null_;
     col_K=&col_A*&col_B;
     call symput('col_K', compress(col_K));
run;
%put &col_A  &col_B  &col_K &row_B;

proc sql noprint;
     select name into :vars_A separated by ' '
  from   sashelp.vcolumn
  where  libname='WORK' and memname=%upcase("&mat_A")
  ;
     select name into :vars_B separated by ' '
  from   sashelp.vcolumn
  where  libname='WORK' and memname=%upcase("&mat_B")
  ;
quit;

data &mat_kronecker;
     array _K{&col_K} K1-K&col_K;
  array _A{&col_A} &vars_A;
  array _B{&col_B} &vars_B;
  array _Atmp{&col_A} _temporary_;
     array _Btmp{&col_B} _temporary_;

  do n1=1 to na;  
     set &mat_A  nobs=na point=n1;
  do j=1 to dim(_A);
           _Atmp[j]=_A[j];
  end;
     do n=1 to nb;
           set &mat_B nobs=nb point=n;
     do j=1 to dim(_B);
              _Btmp[j]=_B[j];
     end;
     do j=1 to &col_K;
        _ib=mod(j, &col_B); if _ib=0 then _ib=&col_B;
        _ia=floor((j-_ib)/&col_B)+1;
     *put _ia= _ib=  j= _A[_ia]= _B[_ib]=;
              _K[j]=_Atmp[_ia]*_Btmp[_ib];
     end;
     keep K1-K&col_K;
     output;
     end;
  end;
  stop;
run;

%mend kronecker;

options mprint  mlogic;
%kroneck(B, A, C);

data U1t(keep=X:);
     set stat_1;
     where _type_='USCORE';
run;
data U2t(keep=X:);
     set stat_2;
     where _type_='USCORE';
run;
data U3t(keep=X:);
     set stat_3;
     where _type_='USCORE';
run;

options nomprint  nomlogic;
%kroneck(U2t, U3t, U23t);

data A1score;
     set A1;
     retain _TYPE_ 'PARMS'; 
     _NAME_=compress('K'||_n_);
run;

proc score data=u1t  score=A1score out=u1A1(keep=K:) type=parms;
     var X1-X3;
run;
data u1A1;
     set u1A1;
     retain _TYPE_ 'PARMS'; 
     _NAME_=compress('X'||_n_);
run;

proc score data=u23t  score=u1A1  out=S1(keep=X:) type=parms;
     var K1-K9;
run;

Reference:
Matrix Methods in Data Mining and Pattern Recognition by Lars Eldén

Matrix Methods in Data Mining and Pattern Recognition (Fundamentals of Algorithms)

This post was kindly contributed by SAS Programming for Data Mining Applications - go there to comment and to read the full post.