This post was kindly contributed by SASTechies - go there to comment and to read the full post. |
Folks, I was going thru few visualizations on Tableau’s website today and I came across this visualization Exploring the SSA Baby Names Dataset by one of the acclaimed Tableau professional….It made me thinking to explore of How many of those SSA baby names are of Indian American (Desi) descent…
I found a website online that had a list of popular Indian baby names…I read the data into SAS and made a Tableau Story out of it… Please take a few moments to play with this interesting viz…I hope you like it….
Here’s the SAS code that went into the prep of the data…
/*Read Indian Baby Names by parsing the http URL */
libnamesharad “C:UsersSharadDesktopnamesbystate”;
%macroloop(type);
proc sql; drop table name; quit;
%doi=1 %to 60;
filename foo url
“http://www.modernindianbabynames.com/modern_baby_name/starting_with/ANY/MF/Sikh/1560/&i.”;
data _null;
retain start recind recst recend hier;
length SN Name Meaning Gender Origin $ 100;
retain SN Name Meaning Gender Origin;
infile foo length=len;
input record $varying200.len;
put record $varying200.len;
if index(record,‘) then start=1;
if index(record,‘
‘) then start=0;
if index(record,‘) then delete;
if index(record,‘
‘
) then do; recvalst=1; hier+1;; end;
‘
) then do; recvalst=1; hier+1;; end;
if index(record,‘
‘) then do; recvalend=1; delete; end;
if index(record,‘ ‘ ) then do; recst=1; hier=0;delete; end;
if index(record,‘
‘) then do; recst=0; hier=0;; end;
if hier=1then do; record=tranwrd(record,‘
‘
,”); SN=strip(record); end;
‘
,”); SN=strip(record); end;
else if hier=2then do; record=tranwrd(record,‘
‘
,”); Name=strip(record); end;
‘
,”); Name=strip(record); end;
else if hier=3then do; record=tranwrd(record,‘
‘
,”); Meaning=strip(record); end;
‘
,”); Meaning=strip(record); end;
else if hier=4then do; record=tranwrd(record,‘
‘
,”); Gender=strip(record); end;
‘
,”); Gender=strip(record); end;
else if hier=5then do; record=tranwrd(record,‘
‘
,”); Origin=strip(record); end;
‘
,”); Origin=strip(record); end;
record=tranwrd(record,‘
‘
,”);
‘
,”);
record=tranwrd(record,‘
‘,”);
record=strip(record);
if index(record,‘
‘) and start then do; recend=1; hier=0; output; end;
else delete;
keep SN Name Meaning Gender Origin;
run;
OPTION SPOOL;
proc append data=_null base=sharad.&type force; run;
%end;
%mendloop;
%loop(Hindi);
/*
Make a list of Indian Names that definetly sound Indian or Closely Indian
Y – Yes
P – Indian Possibility
*/
dataSharad.Def_IndiaNames;
infilecards4 dlm=’09’xmissover;
lengthName $ 100 IndianorNot $ 1;
inputName IndianorNot;
Name=strip(propcase(Name));
cards;
Name Indian
Tina P
Tanya P
Maya P
Trisha Y
Nadia P
Amir P
Aisha P
Tanisha P
Chandra P
Chaya P
Rohan Y
—-and 1000’s of other records—
;
run;
/*
Join all available Indian Names
*/
dataSharad.ALLNames;
setSharad.telugu
sharad.bengali sharad.hindi sharad.sikh;
Name=translate(Name,”,“‘”);
ifcompress(Name)=” thendelete;
dropSNO SN;
run;
/*
Remove Dups
*/
procsort data=Sharad.ALLNames noduprecs; by Name; run;
/*
Re-purpose the data a bit
*/
dataSharad.IndianNames(rename=(dMeaning=Meaning dGender=IGender dOrigin=Origin));
lengthdMeaning $ 100 dGender $15dOrigin $ 100;
retaindMeaning dGender dOrigin;
setSharad.ALLNames;
byName;
if first.name then
do;
dMeaning=”;
dOrigin=”;
dGender=”;
end;
ifindex(strip(dMeaning),strip(Meaning)) eq 0then dMeaning=catx(‘ OR ‘,strip(dMeaning),strip(Meaning));
ifindex(strip(dOrigin),strip(Origin)) eq 0then dOrigin=catx(‘ ,’,strip(dOrigin),strip(Origin));;
ifindex(strip(dGender),strip(Gender)) eq 0then dGender=catx(‘ OR ‘,strip(dGender),strip(Gender));;
ifdGender in (“Boy OR Girl”,“Girl OR Boy”) thendGender=“Boy OR Girl”;
dGender=strip(dGender);
if last.name thenoutput;
keepName dMeaning dGender dOrigin;
run;
/*
Read US Gov SSA Baby Names data fields
*/
filenameallst “C:UsersSharadDesktopnamesbystateallallstates.txt”;
dataSharad.USNames;
infileallst dlm=‘,’dsd missoverfirstobs=2;
lengthState $ 2 Gender $1Year $4 Name $ 50;
inputState Gender Year Name Occurences;
run;
/*
Merge US Gov SSA Baby Names data with Indian Names Data
*/
procsql;
createtable sharad.IndNames as
selectA.*,IGender,Meaning,Origin
fromSharad.USNames A
leftjoin Sharad.IndianNames B
onA.name=B.name
orderby A.name;
quit;
/*
Merge US Gov SSA Baby Names data with Hand picked Indian Data
*/
procsql;
createtable sharad.DefinitelyIndian as
selectA.*,
case
whenA.name=B.name and IndianorNot=‘Y’then ‘Indian Name’
whenA.name=B.name and IndianorNot=‘P’then ‘Likely an Indian Name’
else‘Non-Indian Name’
endas IndianDescent length=10
fromsharad.IndNames A
leftjoin Sharad.Def_IndiaNames B
onA.name=B.name
;
quit;
This post was kindly contributed by SASTechies - go there to comment and to read the full post. |