%let VARS =
fullpath
filename
extension
filetype
instrument
date
time
;
%let
DATE_REGEX=((\d{4})-(\d{2})-(\d{2}))|(\w\w\w-\d\d-\d\d\d\d); * Example: 2018-05-09 or Jan-29-2017;
%let
TIME_REGEX=(\d{2})_(\d{2})_(\d{2}); * Example:
10_30_58 ;
%let DATETIME_REGEX
= (20\d\d\d\d\d\d-\d\d\d\d\d\d); * Example:
20180509-103058 ;
%let
INSTRUMENT_REGEX=(\w\d\d\d\d\w\d\d\d\d)|(\\(\d{4})[\\\s_-]); * Example: D1234E5678 or \1024_ ends with slash
blank underscore or hyphen ;
data
work.files_parsed (keep=&VARS);
attrib fullpath length=$256;
attrib filename length=$ 64;
attrib extension length=$ 8;
attrib filetype length=$ 6;
attrib instrument length=$ 10;
attrib date length=
8 format=&GLBL_DATE_FORMAT;
attrib time length=
8 format=&GLBL_TIME_FORMAT;
retain date_pattern 0; * id of compiled regex pattern for
DATE_REGEX ;
retain time_pattern 0; * id of compiled regex pattern for
TIME_REGEX ;
retain
datetime_pattern 0; * id of compiled
regex pattern for DATETIME_REGEX ;
retain instrument_pattern
0; * id of compiled regex pattern for
INSTRUMENT_REGEX ;
set work.files;
if (_N_ = 1) then do;
date_pattern = prxparse("/&DATE_REGEX./"); * compile regex
;
put date_pattern=;
time_pattern = prxparse("/&TIME_REGEX./"); * compile regex
;
put time_pattern=;
datetime_pattern = prxparse("/&DATETIME_REGEX./"); * compile regex
;
put datetime_pattern=;
instrument_pattern = prxparse("/&INSTRUMENT_REGEX./"); * compile regex ;
put instrument_pattern=;
end;
if (isFolder = 1) then delete;
if (index(memname,
"sfscli.log") > 0) then delete; * has no relevent data ;
if (index(memname,
".xlsx") > 0) then delete; * has no relevant data ;
if (index(memname,
"System
Information.txt") > 0) then delete; * has no
relevant data ;
if (index(memname,
".pdf") > 0) then delete; * not going there... ;
memname =
tranwrd(memname, "/", "\");
fullpath =
memname;
slash =
find(memname, "\", -999);
if (slash > 0) then do;
filename = substr(memname, slash+1);
end;
else do;
put " Record " _N_ " has no
slash.";
delete;
end;
memname =
filename; * less to work with going
forward... ;
period =
find(memname, ".", -999);
if (period > 0) then do;
extension = upcase(substr(memname, period+1));
memname = substr(memname, 1, period - 1);
end;
else do;
put " Record " _N_ " has no
extension.";
delete;
end;
if
(index(filename, "ARC 4") > 0) then do; * special
case...one person ;
filetype = "SNP";
end;
else do;
filetype = upcase(substr(memname,
length(memname) - 2)); * expect DNP,
SNP, EVT, CSV ;
if (first(filetype) >= "0" and first(filetype)
<= "9") then do;
put " Record " _N_ " does not
appear to have a recognized filetype. " fullpath=;
delete;
end;
else do;
memname = substr(memname, 1,
length(memname) - 3);
end;
end;
* must check for
date and time, separately, on file name, before ;
* attempting to
check for date and time, combined, on folder name. ;
pos = prxmatch(date_pattern,
memname);
if (pos > 0) then do;
CALL PRXSUBSTR (date_pattern, memname, position,
length);
if (length = 10) then do;
date = input(substr(memname, position, length), yymmdd10.);
end;
else if (length = 11) then do;
date = input(substr(memname,
position, length), anydtdte11.);
end;
end;
else do;
date = . ;
end;
pos =
prxmatch(time_pattern, memname);
if (pos > 0) then do;
CALL PRXSUBSTR (time_pattern, memname, position,
length);
time = input(substr(memname, position,
length), time8.);
end;
else do;
time = . ;
end;
if (date = . and time = .) then do;
pos = prxmatch(datetime_pattern, fullpath);
if (pos > 0) then do;
CALL PRXSUBSTR
(datetime_pattern, fullpath, position, length);
date = input(substr(fullpath,
position, 8), yymmdd8.);
time = input(substr(fullpath,
position + 9, 6), hhmmss6.);
end;
end;
if
(upcase(filename) = upcase("MPU_EVENT_LOG-EVT.dat")) then do;
* date and time are not required for these files
;
end;
else do;
if (date = .) then do;
put " Record " _N_ " has no
date. " fullpath=;
delete;
end;
else if (time = .) then do;
put " Record " _N_ " has no
time. " fullpath=;
delete;
end;
end;
pos =
prxmatch(instrument_pattern, fullpath);
if (pos > 0) then do;
CALL PRXSUBSTR (instrument_pattern, fullpath,
position, length);
if (length = 6) then do;
instrument = substr(fullpath,
position + 1, length - 2);
* remove leading slash, trailing character ;
end;
else do;
instrument = substr(fullpath,
position, length);
end;
end;
else do;
put " Record " _N_ " has no
instrument. " fullpath=;
delete;
end;
run;