Thursday, August 23, 2018
SAS: Time each task
%put >>>>>--------------------------------------------------------------------------------------------- ;
%put >>>>> Begin PROC FREQ at %sysfunc(time(),timeampm.) on %sysfunc(date(),worddate.).;
%put >>>>>--------------------------------------------------------------------------------------------- ;
proc freq data=work.event_codes;
tables event_code;
run;
%put >>>>>--------------------------------------------------------------------------------------------- ;
%put >>>>> Job complete at %sysfunc(time(),timeampm.) on %sysfunc(date(),worddate.).;
%put >>>>>--------------------------------------------------------------------------------------------- ;
Friday, June 29, 2018
SAS: Blank when zero
SAS: Blank when zero
* blank when zero ;
proc format;
value bwz
0 = ' '
;
Sample usage
format
units_sold
units_returned
bwz7. ;
* blank when zero ;
proc format;
value bwz
0 = ' '
;
Sample usage
format
units_sold
units_returned
bwz7. ;
Tuesday, June 26, 2018
SAS: Remove unicode characters from string field
Remove unicode characters from string field
* remove unicode characters. For example grave accent + H is Omega symbol. ;
* Source: https://communities.sas.com/t5/SAS-Procedures/SAS8-How-to-remove-special-characters-from-string/td-p/59186 ;
keyboard = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890`~!@#$%^&*()-_=+\|[]{};:',.<>?/ ";
stringField = compress(stringField, keyboard, "kis");Wednesday, May 9, 2018
SAS: My first SAS program using regular expressions
SAS: My first SAS program using regular expressions
%let VARS =
fullpath
filename
extension
filetype
instrument
date
time
;
%let
DATE_REGEX=((\d{4})-(\d{2})-(\d{2}))|(\w\w\w-\d\d-\d\d\d\d); * Example: 2018-05-09 or Jan-29-2017;
%let
TIME_REGEX=(\d{2})_(\d{2})_(\d{2}); * Example:
10_30_58 ;
%let DATETIME_REGEX
= (20\d\d\d\d\d\d-\d\d\d\d\d\d); * Example:
20180509-103058 ;
%let
INSTRUMENT_REGEX=(\w\d\d\d\d\w\d\d\d\d)|(\\(\d{4})[\\\s_-]); * Example: D1234E5678 or \1024_ ends with slash
blank underscore or hyphen ;
data
work.files_parsed (keep=&VARS);
attrib fullpath length=$256;
attrib filename length=$ 64;
attrib extension length=$ 8;
attrib filetype length=$ 6;
attrib instrument length=$ 10;
attrib date length=
8 format=&GLBL_DATE_FORMAT;
attrib time length=
8 format=&GLBL_TIME_FORMAT;
retain date_pattern 0; * id of compiled regex pattern for
DATE_REGEX ;
retain time_pattern 0; * id of compiled regex pattern for
TIME_REGEX ;
retain
datetime_pattern 0; * id of compiled
regex pattern for DATETIME_REGEX ;
retain instrument_pattern
0; * id of compiled regex pattern for
INSTRUMENT_REGEX ;
set work.files;
if (_N_ = 1) then do;
date_pattern = prxparse("/&DATE_REGEX./"); * compile regex
;
put date_pattern=;
time_pattern = prxparse("/&TIME_REGEX./"); * compile regex
;
put time_pattern=;
datetime_pattern = prxparse("/&DATETIME_REGEX./"); * compile regex
;
put datetime_pattern=;
instrument_pattern = prxparse("/&INSTRUMENT_REGEX./"); * compile regex ;
put instrument_pattern=;
end;
if (isFolder = 1) then delete;
if (index(memname,
"sfscli.log") > 0) then delete; * has no relevent data ;
if (index(memname,
".xlsx") > 0) then delete; * has no relevant data ;
if (index(memname,
"System
Information.txt") > 0) then delete; * has no
relevant data ;
if (index(memname,
".pdf") > 0) then delete; * not going there... ;
memname =
tranwrd(memname, "/", "\");
fullpath =
memname;
slash =
find(memname, "\", -999);
if (slash > 0) then do;
filename = substr(memname, slash+1);
end;
else do;
put " Record " _N_ " has no
slash.";
delete;
end;
memname =
filename; * less to work with going
forward... ;
period =
find(memname, ".", -999);
if (period > 0) then do;
extension = upcase(substr(memname, period+1));
memname = substr(memname, 1, period - 1);
end;
else do;
put " Record " _N_ " has no
extension.";
delete;
end;
if
(index(filename, "ARC 4") > 0) then do; * special
case...one person ;
filetype = "SNP";
end;
else do;
filetype = upcase(substr(memname,
length(memname) - 2)); * expect DNP,
SNP, EVT, CSV ;
if (first(filetype) >= "0" and first(filetype)
<= "9") then do;
put " Record " _N_ " does not
appear to have a recognized filetype. " fullpath=;
delete;
end;
else do;
memname = substr(memname, 1,
length(memname) - 3);
end;
end;
* must check for
date and time, separately, on file name, before ;
* attempting to
check for date and time, combined, on folder name. ;
pos = prxmatch(date_pattern,
memname);
if (pos > 0) then do;
CALL PRXSUBSTR (date_pattern, memname, position,
length);
if (length = 10) then do;
date = input(substr(memname, position, length), yymmdd10.);
end;
else if (length = 11) then do;
date = input(substr(memname,
position, length), anydtdte11.);
end;
end;
else do;
date = . ;
end;
pos =
prxmatch(time_pattern, memname);
if (pos > 0) then do;
CALL PRXSUBSTR (time_pattern, memname, position,
length);
time = input(substr(memname, position,
length), time8.);
end;
else do;
time = . ;
end;
if (date = . and time = .) then do;
pos = prxmatch(datetime_pattern, fullpath);
if (pos > 0) then do;
CALL PRXSUBSTR
(datetime_pattern, fullpath, position, length);
date = input(substr(fullpath,
position, 8), yymmdd8.);
time = input(substr(fullpath,
position + 9, 6), hhmmss6.);
end;
end;
if
(upcase(filename) = upcase("MPU_EVENT_LOG-EVT.dat")) then do;
* date and time are not required for these files
;
end;
else do;
if (date = .) then do;
put " Record " _N_ " has no
date. " fullpath=;
delete;
end;
else if (time = .) then do;
put " Record " _N_ " has no
time. " fullpath=;
delete;
end;
end;
pos =
prxmatch(instrument_pattern, fullpath);
if (pos > 0) then do;
CALL PRXSUBSTR (instrument_pattern, fullpath,
position, length);
if (length = 6) then do;
instrument = substr(fullpath,
position + 1, length - 2);
* remove leading slash, trailing character ;
end;
else do;
instrument = substr(fullpath,
position, length);
end;
end;
else do;
put " Record " _N_ " has no
instrument. " fullpath=;
delete;
end;
run;
Monday, May 7, 2018
R: Scope of variables within user defined functions
R: Scope of variables within user defined functions
Source: https://stackoverflow.com/questions/10904124/global-and-local-variables-in-r
Variables declared
inside a function are local to that function. For instance:
foo <- function() {->
bar <- 1->}
foo()
bar
gives the following error: Error:
object 'bar' not found.
foo <- function() {->
bar <<- 1->
}
foo()
bar
In this case bar is accessible from outside the function.
However, unlike C, C++
or many other languages, brackets do not determine the scope of variables. For
instance, in the following code snippet:
if (x > 10) {
y <- 0->}
else {
y <- 1->
}
y remains accessible after the if-else statement.
PowerShell: Enable execution of PS scripts of local machine
PowerShell: Enable execution of PS scripts of local machine
Open command window with Run As Administrator
Type powershell and press Enter.
Get-ExecutionPolicy -List
Set-ExecutionPolicy Bypass
Get-ExecutionPolicy -List (you will see LocalMachine is now Bypass)
exit
(There is a scope parameter to specify other than LocalMachine.)
R: ts function gotchas...
R: ts function gotchas...
> # It appears R is
replicating data when creating a time series where
> # requested date range
exceeds available data.
>
> # The moral of the story is
make sure there are no gaps in data
> # prior to creating the
time series!!!
>
> df = data.frame()
>
> x = 10; yr = 2016; mo = 01;
df = rbind(df, data.frame(x, yr, mo))
> x = 12; yr = 2016; mo = 02;
df = rbind(df, data.frame(x, yr, mo))
> x = 15; yr = 2016; mo = 03;
df = rbind(df, data.frame(x, yr, mo))
> x = 19; yr = 2016; mo = 04;
df = rbind(df, data.frame(x, yr, mo))
>
> x = 11; yr = 2016; mo = 05;
df = rbind(df, data.frame(x, yr, mo))
> x = 13; yr = 2016; mo = 06;
df = rbind(df, data.frame(x, yr, mo))
> x = 16; yr = 2016; mo = 07;
df = rbind(df, data.frame(x, yr, mo))
> x = 20; yr = 2016; mo = 08;
df = rbind(df, data.frame(x, yr, mo))
>
> x = 12; yr = 2016; mo = 09;
df = rbind(df, data.frame(x, yr, mo))
> x = 14; yr = 2016; mo = 10;
df = rbind(df, data.frame(x, yr, mo))
> x = 17; yr = 2016; mo = 11;
df = rbind(df, data.frame(x, yr, mo))
> x = 21; yr = 2016; mo = 12;
df = rbind(df, data.frame(x, yr, mo))
>
> x = 13; yr = 2017; mo = 01;
df = rbind(df, data.frame(x, yr, mo))
> x = 15; yr = 2017; mo = 02;
df = rbind(df, data.frame(x, yr, mo))
> x = 18; yr = 2017; mo = 03;
df = rbind(df, data.frame(x, yr, mo))
> x = 22; yr = 2017; mo = 04;
df = rbind(df, data.frame(x, yr, mo))
>
> print(df)
x
yr mo
1 10 2016
1
2 12 2016
2
3 15 2016
3
4 19 2016
4
5 11 2016
5
6 13 2016
6
7 16 2016
7
8 20 2016
8
9 12 2016
9
10 14 2016 10
11 17 2016 11
12 21 2016 12
13 13 2017 1
14 15 2017 2
15 18 2017 3
16 22 2017 4
>
> # get first and last month
and year
> min_yr = min(df$yr)
> min_mo= min(subset(df,
yr==min_yr)$mo)
> max_yr = max(df$yr)
> max_mo= max(subset(df,
yr==max_yr)$mo)
>
> print(paste0("Have
values from ", min_mo, "/", min_yr, " thru ", max_mo,
"/", max_yr))
[1] "Have values from
1/2016 thru 4/2017"
>
> ts = ts(df$x,
start=c(min_yr, min_mo), end=c(max_yr, max_mo), frequency=12)
> print(ts)
Jan Feb Mar Apr May Jun Jul Aug Sep Oct
Nov Dec
2016 10
12 15 19
11 13 16
20 12 14
17 21
2017 13
15 18 22
>
> max_mo = 8 # data does not really go out that far
> ts2 = ts(df$x,
start=c(min_yr, min_mo), end=c(max_yr, max_mo), frequency=12)
> print(ts2)
Jan Feb Mar Apr May Jun Jul Aug Sep Oct
Nov Dec
2016 10
12 15 19
11 13 16
20 12 14
17 21
2017 13
15 18 22
10 12 15
19
> # N O T E: I do not have data for May-Aug 2017 so it uses Jan-Apr 2015 ! ! !
>
> # drop one row, Oct 2017
> df = subset(df, !(yr ==
2016 & mo == 10))
> print(df)
x
yr mo
1 10 2016
1
2 12 2016
2
3 15 2016
3
4 19 2016
4
5 11 2016
5
6 13 2016
6
7 16 2016
7
8 20 2016
8
9 12 2016
9
11 17 2016 11
12 21 2016 12
13 13 2017 1
14 15 2017 2
15 18 2017 3
16 22 2017 4
> max_mo = 4 # undo prior test
> ts3 = ts(df$x,
start=c(min_yr, min_mo), end=c(max_yr, max_mo), frequency=12)
> print(ts3)
Jan Feb Mar Apr May Jun Jul Aug Sep Oct
Nov Dec
2016 10
12 15 19
11 13 16
20 12 17
21 13
2017 15
18 22 10
> # N O T E: This shows df dates are ignored when creating time series ! ! !
Subscribe to:
Posts (Atom)