#!/usr/bin/env Rscript # # QartodCalc.R - Calculates QARTOD flags for CSV files # # Notes: vector/array indexes in R start at 1 # called by: # qartod.sh # command line syntax: # ./QartodCalc.R STATION DATA_TYPE INPUT_FILE OUTPUT_FILE [append] [allflags] # # where STATION = Station name (eg: "erie-cmt") # DATA_TYPE = Data type to process (eg: "atemp_csi") # INPUT_FILE = Input CSV file to process (eg: "~/obs/tmp/erie-cmt/csvdata/atemp_csi.csv") # OUTPUT_FILE = Output CSV file (eg: "/data/obs/now/erie-cmt/csv_format/atemp_csi.csv") # append = Option to only append new data # allflags = Option to also output all flags: # (known, range, climate, spike, flat line, rate of change) # depends upon: # /home/realtime/obs/now/QartodTable.R # /home/realtime/obs/now/QartodKnownFails.R # # /home/realtime/obs/now/QartodAirTempTable.csv # /home/realtime/obs/now/QartodBottomTempTable.csv # /home/realtime/obs/now/QartodProfileTempTable.csv # /home/realtime/obs/now/QartodSurfaceTempTable.csv # /home/realtime/obs/now/QartodParBuoyTable.csv # /home/realtime/obs/now/QartodSolarBuoyTable.csv # ********* # * Title * # ********* cat( "[QartodCalc.R started at ", as.character( Sys.time() ), "]\n\n", sep="" ) progstarttime = as.integer( Sys.time() ) # ************** # * Parameters * # ************** # Load tables RootDir = "/home/realtime/obs/now/" source( paste( RootDir, "QartodTable.R", sep="" ) ) source( paste( RootDir, "QartodKnownFails.R", sep="" ) ) ClimateAirTempTable = paste( RootDir, "QartodAirTempTable.csv", sep="" ) ClimateBottomTempTable = paste( RootDir, "QartodBottomTempTable.csv", sep="" ) ClimateProfileTempTable = paste( RootDir, "QartodProfileTempTable.csv", sep="" ) ClimateSurfaceTempTable = paste( RootDir, "QartodSurfaceTempTable.csv", sep="" ) ClimateParBuoyTable = paste( RootDir, "QartodParBuoyTable.csv", sep="" ) ClimateSolarBuoyTable = paste( RootDir, "QartodSolarBuoyTable.csv", sep="" ) # *********** # * Program * # *********** # Options options( max.print = 200 ) # Get command line arguments args = commandArgs( trailingOnly = TRUE ) if ( is.na( args[ 4 ] ) ) { cat( "MISSING PARAMETERS", "", "Syntax: ./QartodCalc.R STATION DATA_TYPE INPUT_FILE OUTPUT_FILE [append] [allflags]", "", "where STATION = Station name (eg: erie-cmt)", " DATA_TYPE = Data type to process (eg: atemp_csi)", " INPUT_FILE = Input CSV file to process (eg: ~/obs/tmp/erie-cmt/csvdata/atemp_csi.csv)", " OUTPUT_FILE = Output CSV file (eg: /data/obs/now/erie-cmt/csv_format/atemp_csi.csv)", " append = Option to only append new data", " allflags = Option to also output all flags:", " (known, range, climate, spike, flat line, rate of change)", "", sep="\n" ) quit( save = "no" ) } Station = args[ 1 ] DataType = args[ 2 ] InputFile = args[ 3 ] OutputFile = args[ 4 ] AppendFlag = FALSE AllFlags = FALSE i = 5 while ( i <= 6 ) { if ( ! is.na( args[ i ] ) ) { if ( args[ i ] == "append" ) { AppendFlag = TRUE } if ( args[ i ] == "allflags" ) { AllFlags = TRUE } } i = i + 1 } cat( "[Station = ", Station, "]\n", sep="" ) cat( "[DataType = ", DataType, "]\n", sep="" ) cat( "[InputFile = ", InputFile, "]\n", sep="" ) cat( "[OutputFile = ", OutputFile, "]\n", sep="" ) cat( "[AppendFlag = ", AppendFlag, "]\n", sep="" ) cat( "[AllFlags = ", AllFlags, "]\n", sep="" ) cat( "\n" ) # *********************************** # * Extract Qartod Table Parameters * # *********************************** ClimateAirTempValue = 1 # "AirTemp" in table ClimateSurfaceTempValue = 2 # "SrfTemp" in table ClimateBottomTempValue = 3 # "BtmTemp" in table ClimateProfileTempValue = 4 # "ProTemp" in table ClimateParBuoyValue = 5 # "ParBuoy" in table ClimateSolarBuoyValue = 6 # "SolBuoy" in table # Locate data type in qartod table search = paste( "^", DataType, ",", sep="" ) index = grep( search, QartodTable ) if ( length( index ) == 0 ) { # If data type not found, then just copy file and quit cat( "[DataType ", DataType, " not found in QartodTable.R file]\n", sep="" ) cat( "\n", sep="" ) cat( "[Copying ", InputFile, " to ", OutputFile, "]\n", sep="" ) cmd = paste( "cp -v", InputFile, OutputFile, sep=" " ) system( cmd, intern=FALSE, ignore.stdout=FALSE, ignore.stderr=FALSE, wait=TRUE ) cat( "\n", sep="" ) cat( "[Converting format to dos]\n", sep="" ) cmd = paste( "sed 's/\r//g' -i ", OutputFile, sep="" ) system( cmd, intern=FALSE, ignore.stdout=FALSE, ignore.stderr=FALSE, wait=TRUE ) cmd = paste( "todos -v ", OutputFile, sep="" ) system( cmd, intern=FALSE, ignore.stdout=FALSE, ignore.stderr=FALSE, wait=TRUE ) cat( "\n", sep="" ) cat( "[QartodCalc.R done at ", as.character( Sys.time() ), "]\n", sep="" ) q() } # Extract parameters for row in qartod table qtparamrow = unlist( strsplit( QartodTable[ index ], "," ) ) qtparams = suppressWarnings( as.numeric( qtparamrow[ ndxParamStart : ndxParamEnd ] ) ) # Get dim value ndim = qtparams[ ndxDim ] cat( "[ndim = ", ndim, "]\n", sep="" ) # Convert climate test type to numeric value climatetesttype = gsub( " ", "", qtparamrow[ ndxClimateTest + ndxParamStart - 1 ], fixed = TRUE ) if ( climatetesttype == "AirTemp" ) { qtparams[ ndxClimateTest ] = ClimateAirTempValue } else if ( climatetesttype == "SrfTemp" ) { qtparams[ ndxClimateTest ] = ClimateSurfaceTempValue } else if ( climatetesttype == "BtmTemp" ) { qtparams[ ndxClimateTest ] = ClimateBottomTempValue } else if ( climatetesttype == "ProTemp" ) { qtparams[ ndxClimateTest ] = ClimateProfileTempValue } else if ( climatetesttype == "ParBuoy" ) { qtparams[ ndxClimateTest ] = ClimateParBuoyValue } else if ( climatetesttype == "SolBuoy" ) { qtparams[ ndxClimateTest ] = ClimateSolarBuoyValue } else { qtparams[ ndxClimateTest ] = 0 } # Load climate tables if needed if ( qtparams[ ndxClimateTest ] == ClimateAirTempValue ) { airtemptable = read.csv( ClimateAirTempTable, sep=',', header=TRUE, stringsAsFactors = FALSE, blank.lines.skip = FALSE ) } if ( qtparams[ ndxClimateTest ] == ClimateSurfaceTempValue ) { surfacetemptable = read.csv( ClimateSurfaceTempTable, sep=',', header=TRUE, stringsAsFactors = FALSE, blank.lines.skip = FALSE ) } if ( qtparams[ ndxClimateTest ] == ClimateBottomTempValue ) { bottomtemptable = read.csv( ClimateBottomTempTable, sep=',', header=TRUE, stringsAsFactors = FALSE, blank.lines.skip = FALSE ) } if ( qtparams[ ndxClimateTest ] == ClimateProfileTempValue ) { profiletemptable = read.csv( ClimateProfileTempTable, sep=',', header=TRUE, stringsAsFactors = FALSE, blank.lines.skip = FALSE ) } if ( qtparams[ ndxClimateTest ] == ClimateParBuoyValue ) { partable = read.csv( ClimateParBuoyTable, sep=',', header=TRUE, stringsAsFactors = FALSE, blank.lines.skip = FALSE ) } if ( qtparams[ ndxClimateTest ] == ClimateSolarBuoyValue ) { partable = read.csv( ClimateSolarBuoyTable, sep=',', header=TRUE, stringsAsFactors = FALSE, blank.lines.skip = FALSE ) } # ************************ # * Read Input Data File * # ************************ # First read all lines into string vector alldata = readLines( InputFile ) nalldata = length( alldata ) # Find number of header lines by finding first line starting with characters "20" (year) i = 1 while ( i <= nalldata ) { if ( substr( alldata[ i ], 1, 2 ) == "20" ) { break } i = i + 1 } nheaderdata = i - 1 # Separate header and csv text data headerdata = alldata[ 1 : nheaderdata ] csvtextdata = alldata[ nheaderdata + 1 : nalldata ] ncsvdata = nalldata - nheaderdata cat( "[nheaderdata = ", nheaderdata, "]\n", sep="" ) cat( "[ncsvdata = ", ncsvdata, "]\n", sep="" ) # ***************************** # * Append - Read Output File * # ***************************** if (AppendFlag ) { # If file doesn't exist, then cancel append mode if ( ! file.exists( OutputFile ) ) { cat( "[NO OUTPUT FILE TO APPEND--Creating a new one]\n" ) AppendFlag = FALSE } } if (AppendFlag ) { # First read all lines into string vector olddata = readLines( OutputFile ) nolddata = length( olddata ) cat( "[nolddata = ", nolddata, "]\n", sep="" ) # Find number of header lines by finding first line starting with characters "20" (year) i = 1 while ( i <= nolddata ) { if ( substr( olddata[ i ], 1, 2 ) == "20" ) { break } i = i + 1 } noldheaderdata = i - 1 cat( "[noldheaderdata = ", noldheaderdata, "]\n", sep="" ) # Calculate size of old csv data noldcsvdata = nolddata - noldheaderdata cat( "[noldcsvdata = ", noldcsvdata, "]\n", sep="" ) # Get last time stamp in file as seconds since epoch lastdatarow = unlist( strsplit( olddata[ nolddata ], "," ) ) cat( "[lasttime = ", lastdatarow[ 1 ], "\n", sep="" ) lastsecs = tryCatch( { suppressWarnings( as.integer( as.POSIXct( lastdatarow[ 1 ], units="secs" ) ) ) }, warning = function( war ) { options( show.error.messages = FALSE, showWarnCalls = FALSE, showErrorCalls = FALSE ) cat( "Error: INVALID TIME VALUE\n" ) stop() }, error = function( err ) { options( show.error.messages = FALSE, showWarnCalls = FALSE, showErrorCalls = FALSE ) cat( "Error: INVALID TIME VALUE\n" ) stop() } ) } # **************************** # * Extract Data Time Stamps * # **************************** # Convert time stamps to seconds since epoch allsecs = tryCatch( { suppressWarnings( as.integer( as.POSIXct( csvtextdata, units="secs" ) ) ) }, warning = function( war ) { options( show.error.messages = FALSE, showWarnCalls = FALSE, showErrorCalls = FALSE ) cat( "Error: INVALID TIME VALUE\n" ) stop() }, error = function( err ) { options( show.error.messages = FALSE, showWarnCalls = FALSE, showErrorCalls = FALSE ) cat( "Error: INVALID TIME VALUE\n" ) stop() } ) # *************************************** # * Append - Find First New Data Record * # *************************************** # Use all data if not appending icsvdata = 1 # Index to start of new data in csvdata[] ndata = ncsvdata # Number of data values to process (new data values + up to 4 values back) idata = 1 # Index to start of values to process in csvdata[] if (AppendFlag ) { # Find first record in csvdata that is later then the last record in olddata # This will be the index in csvdata array where new data starts i = ncsvdata while ( i >= 1 ) { if ( allsecs[ i ] <= lastsecs ) { icsvdata = i + 1 break } i = i - 1 } if ( icsvdata > ncsvdata ) { cat( "[NO DATA TO APPEND]\n\n" ) cat( "[QartodCalc.R done at ", as.character( Sys.time() ), "]\n", sep="" ) q() } # Go back up to 4 previous data rows for data processing idata = icsvdata - 4 if ( idata < 1 ) { idata = 1 } ndata = ncsvdata - idata + 1 } cat( "[icsvdata = ", icsvdata, "]\n", sep="" ) cat( "[idata = ", idata, "]\n", sep="" ) cat( "[ndata = ", ndata, "]\n", sep="" ) # ********************************* # * Read Input Data in CSV Format * # ********************************* # Read in data lines as data array n = nheaderdata + ncsvdata - ndata # Skip header and data not being processed csvdata = read.csv( InputFile, sep=',', skip=n, header=FALSE, stringsAsFactors=FALSE, blank.lines.skip=FALSE ) ndatacols = ncol( csvdata ) - 1 if ( ndatacols < 1 ) { stop( "INPUT FILE LESS THAN 2 COLUMNS" ) } if ( ndim == 1 ) { if ( ndatacols != 1 ) { stop( "INPUT FILE NOT 2 COLUMNS" ) } } if ( nrow( csvdata ) != ndata ) { stop( "PROGRAM ERROR: nrow( csvdata ) != ndata" ) } # Convert data strings to numeric and confirm that all values are numeric if ( ndatacols == 1 ) { data = suppressWarnings( as.numeric( csvdata[ , 2 ] ) ) if ( any( is.na( data ) ) ) { stop( "INVALID DATA VALUES" ) } } else { data = csvdata[ 1 : ndata , 2 : ( ndatacols + 1 ) ] i = 1 while ( i <= ndatacols ) { if ( any( is.na( suppressWarnings( as.numeric( data[ , i ] ) ) ) ) ) { stop( "INVALID DATA VALUES" ) } i = i + 1 } } # Generate array of correct dimensions with all PassValue if ( ndatacols == 1 ) { passflags = rep( PassValue, ndata ) } else { passflags = data # Use data array as template to set up two dimensional array in R, then fill with PassValue q = 1 while ( q <= ndatacols ) { passflags[ , q ] = PassValue q = q + 1 } } cat( "[ndatacols = ", ndatacols, "]\n", sep="" ) cat( "\n" ) # *********************** # * Process Time Values * # *********************** # Get subset of allsecs[] to secs[] to match csvdata[] secs = allsecs[ ( ncsvdata - ndata + 1 ) : ncsvdata ] # Convert time stamps to julian day (only if doing any climate tests) # (Input format errors already caught above) if ( qtparams[ ndxClimateTest ] ) { julday = as.integer( strftime( csvdata[ , 1 ], format="%j" ) ) # Get hour for Par or Solar test if ( ( qtparams[ ndxClimateTest ] == ClimateParBuoyValue ) || ( qtparams[ ndxClimateTest ] == ClimateSolarBuoyValue ) ) { hour = as.integer( strftime( csvdata[ , 1 ], format="%H" ) ) } } # Determine time between samples in seconds: # Calculate each time between samples (set first interval to 0) deltasecs = c( 0, secs[ 2 : ndata ] - secs[ 1 : ( ndata - 1 ) ] ) # Calculate typical delta time from last 200 data values using histogram function table() typdeltasecs = as.integer( names( sort( table( tail( deltasecs, 200 ) ), decreasing=TRUE )[1] ) ) cat( "[typdeltasecs = ", typdeltasecs, "]\n\n", sep="" ) # Identify all records with gaps > 10% of typdeltasecs gaps = rep( FALSE, ndata ) gaps[ deltasecs > as.integer( 1.1 * typdeltasecs ) ] = TRUE gaps[ deltasecs < as.integer( 0.9 * typdeltasecs ) ] = TRUE # ************************ # * Check Known Failures * # ************************ cat( "[Check Known Failures]\n", sep="" ) knownflags = passflags n = length( QartodKnownFailsTable ) i = 0 flag = 1 while ( i < n ) { i = i + 1 # First check for matching sensor and station failparamrow = unlist( strsplit( QartodKnownFailsTable[ i ], "," ) ) if ( ( failparamrow[ ndxSensor ] == DataType ) && ( failparamrow[ ndxStation ] == Station ) ) { if ( flag ) { cat( "[Matching station and sensor]\n", sep="" ) flag = 0 } # Get fail start and end times in seconds since epoch hour = as.integer( as.integer( failparamrow[ ndxFailTime ] ) / 100 ) min = as.integer( as.integer( failparamrow[ ndxFailTime ] ) %% 100 ) failstartsecs = as.integer( as.POSIXct( paste( failparamrow[ ndxFailYear ], "/", failparamrow[ ndxFailMonth ], "/", failparamrow[ ndxFailDay ], " ", hour, ":", min, sep="" ), units="secs" ) ) hour = as.integer( as.integer( failparamrow[ ndxEndTime ] ) / 100 ) min = as.integer( as.integer( failparamrow[ ndxEndTime ] ) %% 100 ) failendsecs = as.integer( as.POSIXct( paste( failparamrow[ ndxFailYear ], "/", failparamrow[ ndxEndMonth ], "/", failparamrow[ ndxEndDay ], " ", hour, ":", min, sep="" ), units="secs" ) ) # Then flag all data within known bad dates if ( ndatacols == 1 ) { knownflags[ ( secs >= failstartsecs ) & ( secs <= failendsecs ) ] = as.integer( failparamrow[ ndxFlagValue ] ) } else { knownflags[ ( secs >= failstartsecs ) & ( secs <= failendsecs ), 1 ] = as.integer( failparamrow[ ndxFlagValue ] ) q = 2 while ( q <= ndatacols ) { knownflags[ , q ] = knownflags[ , 1 ] q = q + 1 } } } } # ******************** # * Gross Range Test * # ******************** rangeflags = passflags if ( qtparams[ ndxRangeTest ] ) { cat( "[Gross Range Test]\n", sep="" ) if ( ndatacols == 1 ) { rangeflags[ data < qtparams[ ndxUserMin ] ] = SuspectValue rangeflags[ data > qtparams[ ndxUserMax ] ] = SuspectValue rangeflags[ data < qtparams[ ndxSensorMin ] ] = FailValue rangeflags[ data > qtparams[ ndxSensorMax ] ] = FailValue } else { q = 1 while ( q <= ndatacols ) { rangeflags[ data[ , q ] < qtparams[ ndxUserMin ], q ] = SuspectValue rangeflags[ data[ , q ] > qtparams[ ndxUserMax ], q ] = SuspectValue rangeflags[ data[ , q ] < qtparams[ ndxSensorMin ], q ] = FailValue rangeflags[ data[ , q ] > qtparams[ ndxSensorMax ], q ] = FailValue q = q + 1 } } } # **************** # * Climate Test * # **************** climateflags = passflags # Air Temperature Test if ( qtparams[ ndxClimateTest ] == ClimateAirTempValue ) { cat( "[Climate Test: AirTemp]\n", sep="" ) if ( ndatacols == 1 ) { climateflags[ data < airtemptable$MinTempC[ julday ] ] = SuspectValue climateflags[ data > airtemptable$MaxTempC[ julday ] ] = SuspectValue } else { q = 1 while ( q <= ndatacols ) { climateflags[ data[ , q ] < airtemptable$MinTempC[ julday ], q ] = SuspectValue climateflags[ data[ , q ] > airtemptable$MaxTempC[ julday ], q ] = SuspectValue q = q + 1 } } } # Surface Temperature Test if ( qtparams[ ndxClimateTest ] == ClimateSurfaceTempValue ) { cat( "[Climate Test: SrfTemp]\n", sep="" ) if ( ndatacols == 1 ) { climateflags[ data < surfacetemptable$MinTempC[ julday ] ] = SuspectValue climateflags[ data > surfacetemptable$MaxTempC[ julday ] ] = SuspectValue } else { q = 1 while ( q <= ndatacols ) { climateflags[ data[ , q ] < surfacetemptable$MinTempC[ julday ], q ] = SuspectValue climateflags[ data[ , q ] > surfacetemptable$MaxTempC[ julday ], q ] = SuspectValue q = q + 1 } } } # Bottom Temperature Test if ( qtparams[ ndxClimateTest ] == ClimateBottomTempValue ) { cat( "[Climate Test: BtmTemp]\n", sep="" ) if ( ndatacols == 1 ) { climateflags[ data < bottomtemptable$MinTempC[ julday ] ] = SuspectValue climateflags[ data > bottomtemptable$MaxTempC[ julday ] ] = SuspectValue } else { q = 1 while ( q <= ndatacols ) { climateflags[ data[ , q ] < bottomtemptable$MinTempC[ julday ], q ] = SuspectValue climateflags[ data[ , q ] > bottomtemptable$MaxTempC[ julday ], q ] = SuspectValue q = q + 1 } } } # Profile Temperature Test if ( qtparams[ ndxClimateTest ] == ClimateProfileTempValue ) { cat( "[Climate Test: ProTemp]\n", sep="" ) if ( ndatacols == 1 ) { climateflags[ data < profiletemptable$MinTempC[ julday ] ] = SuspectValue climateflags[ data > profiletemptable$MaxTempC[ julday ] ] = SuspectValue } else { q = 1 while ( q <= ndatacols ) { climateflags[ data[ , q ] < profiletemptable$MinTempC[ julday ], q ] = SuspectValue climateflags[ data[ , q ] > profiletemptable$MaxTempC[ julday ], q ] = SuspectValue q = q + 1 } } } # Par or Solar Buoy Test if ( ( qtparams[ ndxClimateTest ] == ClimateParBuoyValue ) || ( qtparams[ ndxClimateTest ] == ClimateSolarBuoyValue ) ) { if ( qtparams[ ndxClimateTest ] == ClimateParBuoyValue ) { cat( "[Climate Test: ParBuoy]\n", sep="" ) } else { cat( "[Climate Test: SolBuoy]\n", sep="" ) } julday8 = bitwShiftR( julday, 3 ) + 1 # Divide by 8 by shifting right 3 bits, then add 1 i = 0 # Loop i = 1 to ndata while ( i < ndata ) { i = i + 1 if ( ndatacols == 1 ) { if ( data[ i ] > partable[ julday8[ i ], hour[ i ] + 2 ] ) { climateflags[ i ] = SuspectValue } } else { q = 1 while ( q <= ndatacols ) { if ( data[ i, q ] > partable[ julday8[ i ], hour[ i ] + 2 ] ) { climateflags[ i, q ] = SuspectValue } q = q + 1 } } } } # ************** # * Spike Test * # ************** # This is actually a step test spikeflags = passflags suspectvalue = qtparams[ ndxThrshldSuspect ] failvalue = qtparams[ ndxThrshldFail ] if ( qtparams[ ndxSpikeTest ] ) { cat( "[Spike Test]\n", sep="" ) q = 1 while ( q <= ndatacols ) { i = 1 # Loop i = 1 to ndata - 1 and j = 2 # j = 2 to ndata while ( j < ndata ) { i = i + 1 j = j + 1 if ( ndatacols == 1 ) { dataj = data[ j ] } else { dataj = data[ j, q ] } if ( gaps[ j ] ) { # If following a gap in data, then check against next two values if available # and if the two values delta are less than the suspect test value k = j + 1 # next value index l = k + 1 # value after next value index if ( l > ndata ) { next } if ( ndatacols == 1 ) { dataj = data[ j ] datak = data[ k ] datal = data[ l ] } else { dataj = data[ j, q ] datak = data[ k, q ] datal = data[ l, q ] } if ( abs( datal - datak ) >= suspectvalue ) { next } delta = abs( datak - dataj ) if ( delta > failvalue ) { cat( "[spike after fail @ ", csvtextdata[ j + idata - 1 ], ", delta = ", delta, " ]\n", sep="" ) if ( ndatacols == 1 ) { spikeflags[ j ] = FailValue } else { spikeflags[ j, q ] = FailValue } i = i + 1 # Skip next value j = j + 1 next } if ( delta > suspectvalue ) { cat( "[spike after suspect @ ", csvtextdata[ j + idata - 1 ], ", delta = ", delta, " ]\n", sep="" ) if ( ndatacols == 1 ) { spikeflags[ j ] = SuspectValue } else { spikeflags[ j, q ] = SuspectValue } i = i + 1 # Skip next value j = j + 1 next } next } if ( ndatacols == 1 ) { if ( rangeflags[ i ] != PassValue ) { next } if ( climateflags[ i ] != PassValue ) { next } delta = abs( dataj - data[ i ] ) } else { if ( rangeflags[ i, q ] != PassValue ) { next } if ( climateflags[ i, q ] != PassValue ) { next } delta = abs( dataj - data[ i, q ] ) } if ( delta > failvalue ) { cat( "[spike fail @ ", csvtextdata[ j + idata - 1 ], ", delta = ", delta, " ]\n", sep="" ) if ( ndatacols == 1 ) { spikeflags[ j ] = FailValue } else { spikeflags[ j, q ] = FailValue } i = i + 1 # Skip next value j = j + 1 next } if ( delta > suspectvalue ) { cat( "[spike suspect @ ", csvtextdata[ j + idata - 1 ], ", delta = ", delta, " ]\n", sep="" ) if ( ndatacols == 1 ) { spikeflags[ j ] = SuspectValue } else { spikeflags[ j, q ] = SuspectValue } i = i + 1 # Skip next value j = j + 1 next } } q = q + 1 } } # ****************** # * Flat Line Test * # ****************** flatflags = passflags testflag = qtparams[ ndxFlatLineTest ] if ( testflag ) { cat( "[Flat Line Test]\n", sep="" ) rptcntsuspect = qtparams[ ndxRptCntSuspect ] rptcntfail = qtparams[ ndxRptCntFail ] eps = qtparams[ ndxEps ] thr = qtparams[ ndxThr ] q = 1 while ( q <= ndatacols ) { samecount = 0 if ( ndatacols == 1 ) { refvalue = data[ 1 ] } else { refvalue = data[ 1, q ] } i = 1 # Loop i = 2 to ndata while ( i < ndata ) { i = i + 1 if ( gaps[ i ] ) { # Reset test if data gap if ( ndatacols == 1 ) { refvalue = data[ i ] } else { refvalue = data[ i, q ] } samecount = 0 next } if ( testflag == 2 ) { # Reset test if data below threshold & test flag = 2 if ( data[ i ] <= thr ) { if ( ndatacols == 1 ) { refvalue = data[ i ] } else { refvalue = data[ i, q ] } samecount = 0 next } } if ( ndatacols == 1 ) { value = data[ i ] } else { value = data[ i, q ] } if ( testflag == 3 ) { # Reset test if data above threshold & test flag = 3 if ( data[ i ] >= thr ) { refvalue = value samecount = 0 next } } if ( abs( value - refvalue ) < eps ) { samecount = samecount + 1 if ( samecount >= rptcntfail ) { if ( ndatacols == 1 ) { flatflags[ (i - rptcntfail + 1 ) : i ] = FailValue cat( "[flat line fail @ ", csvtextdata[ i + idata - 1 ], ", value = ", value, ", count = ", samecount, " ]\n", sep="" ) } else { flatflags[ (i - rptcntfail + 1 ) : i, q ] = FailValue cat( "[flat line fail @ ", csvtextdata[ i + idata - 1 ], ", col = ", q, ", value = ", value, ", count = ", samecount, " ]\n", sep="" ) } } else if ( samecount >= rptcntsuspect ) { if ( ndatacols == 1 ) { flatflags[ (i - rptcntsuspect + 1 ) : i ] = SuspectValue cat( "[flat line suspect @ ", csvtextdata[ i + idata - 1 ], ", value = ", value, ", count = ", samecount, " ]\n", sep="" ) } else { flatflags[ (i - rptcntsuspect + 1 ) : i, q ] = SuspectValue cat( "[flat line suspect @ ", csvtextdata[ i + idata - 1 ], ", col = ", q, ", value = ", value, ", count = ", samecount, " ]\n", sep="" ) } } } else { samecount = 0 refvalue = value } } q = q + 1 } } # *********************** # * Rate of Change Test * # *********************** rateflags = passflags if ( qtparams[ ndxRateOfChangeTest ] ) { cat( "[Rate of Change Test]\n", sep="" ) q = 1 while ( q <= ndatacols ) { if ( ndatacols == 1 ) { proc = ( pmax( knownflags, rangeflags, spikeflags, flatflags ) == 1 ) & ! gaps } else { proc = ( pmax( knownflags[ , q ], rangeflags[ , q ], spikeflags[ , q ], flatflags[ , q ] ) == 1 ) & ! gaps } # Get delta between data samples (index 1 = change between index 2 value and index 1 value) if ( ndatacols == 1 ) { deltadata = data[ 2 : ndata ] - data[ 1 : ( ndata - 1 ) ] } else { deltadata = data[ 2 : ndata, q ] - data[ 1 : ( ndata - 1 ), q ] } # Comparison value is DeltaPerMinSuspect * time between samples compvalue = qtparams[ ndxDeltaPerMinSuspect ] * typdeltasecs / 60 # Flag as suspect if the last 4 delta values exceed the value (positive or negative) i = 4 # Loop i = 5 to ndata while ( i < ndata - 1 ) { i = i + 1 if ( ! min( proc[ ( i - 4 ) : ( i - 1 ) ] ) ) { # Only process if no suspect/fail/gap flags next } if ( ( ( deltadata[ i - 4 ] > compvalue ) && ( deltadata[ i - 3 ] > compvalue ) && ( deltadata[ i - 2 ] > compvalue ) && ( deltadata[ i - 1 ] > compvalue ) ) || ( ( deltadata[ i - 4 ] < -compvalue ) && ( deltadata[ i - 3 ] < -compvalue ) && ( deltadata[ i - 2 ] < -compvalue ) && ( deltadata[ i - 1 ] < -compvalue ) ) ) { if ( ndatacols == 1 ) { rateflags[ ( i - 3 ) : i ] = SuspectValue } else { rateflags[ ( i - 3 ) : i, q ] = SuspectValue } d1 = deltadata[ i - 1 ] * 60 / typdeltasecs d2 = deltadata[ i - 2 ] * 60 / typdeltasecs d3 = deltadata[ i - 3 ] * 60 / typdeltasecs d4 = deltadata[ i - 4 ] * 60 / typdeltasecs cat( "[rate suspect @ ", csvtextdata[ i + idata - 1 ], ", deltadata = ", d4, d3, d2, d1, "]\n", sep=" " ) } } q = q + 1 } } # ***************** # * Gradient Test * # ***************** gradientflags = passflags if ( qtparams[ ndxGradientTest ] && ( ndatacols >= 3 ) ) { cat( "[Gradient Test]\n", sep="" ) gdelta = qtparams[ ndxGdelta ] # Number of columns to process (half for two column data) nproccols = ndatacols if ( ndim == 3 ) { nproccols = floor( ndatacols / 2 ) } i = 1 while ( i <= ndata ) { # Algorithm: # x1 x2 x3 = data column values # d1 d2 = delta between values # | | # \-----d3-----/ = delta between x3 and x1 # # gdelta = minimum delta value that is suspect # # if d1 < gdelta and d2 > gdelta then x3 is suspect # if d1 > gdelta and d2 < gdelta then x1 is suspect # if d1 > gdelta and d2 > gdelta then x2 is suspect and also do test below: # if d3 > gdelta then: # x3 is suspect and # if x1 is first column then x1 is suspect maxdelta = 0 # Columns to process: all for ndim=2, even/odd for ndim=3 c = 0 if ( ndim == 2 ) { cend = 0 jinc = 1 } else { cend = 1 jinc = 2 } while ( c <= cend ) { # Process each column j = 0 jend = nproccols - 3 while ( j <= jend ) { # j = 1 to nproccols - 2 (since we process 3 cols together) j = j + 1 if ( ndim == 2 ) { jj1 = j # Index into data and flags column j } else { jj1 = ( j * 2 ) - 1 + c # Index into data and flags column j } jj2 = jj1 + jinc # Index into data and flags column j + 1 jj3 = jj2 + jinc # Index into data and flags column j + 2 delta1 = abs( data[ i, jj2 ] - data[ i, jj1 ] ) delta2 = abs( data[ i, jj3 ] - data[ i, jj2 ] ) if ( ( delta2 >= gdelta ) && ( delta1 < gdelta ) ) { gradientflags[ i, jj3 ] = SuspectValue if ( delta2 > maxdelta ) { maxdelta = delta2 } next } if ( delta1 >= gdelta ) { if ( delta2 < gdelta ) { gradientflags[ i, jj1 ] = SuspectValue if ( delta1 > maxdelta ) { maxdelta = delta1 } next } if ( delta2 >= gdelta ) { gradientflags[ i, jj2 ] = SuspectValue delta3 = abs( data[ i, jj3 ] - data[ i, jj1 ] ) if ( delta3 >= gdelta ) { gradientflags[ i, jj3 ] = SuspectValue if ( j == 1 ) { gradientflags[ i, jj1 ] = SuspectValue } if ( delta3 > maxdelta ) { maxdelta = delta3 } } next } } } c = c + 1 } if ( maxdelta > 0 ) { cat( "[gradient suspect @ ", csvtextdata[ i + idata - 1 ], ", delta = ", maxdelta, "]\n", sep=" " ) } i = i + 1 } } # *********************** # * Combine Flag Values * # *********************** cat( "[Summary Flags]\n", sep="" ) if ( ndatacols == 1 ) { summaryflags = as.integer( pmax( knownflags, rangeflags, climateflags, spikeflags, flatflags, rateflags, gradientflags ) ) } else { summaryflags = passflags q = 1 while ( q <= ndatacols ) { summaryflags[ , q ] = as.integer( pmax( knownflags[ , q ], rangeflags[ , q ], climateflags[ , q ], spikeflags[ , q ], flatflags[ , q ], rateflags[ , q ], gradientflags[ , q ] ) ) q = q + 1 } } if ( AllFlags ) { cat( "[All Flags]\n", sep="" ) if ( ndatacols == 1 ) { allflags = as.integer( ( 1000000 * knownflags ) + ( 100000 * rangeflags ) + ( 10000 * climateflags ) + ( 1000 * spikeflags ) + ( 100 * rateflags ) + ( 10 * flatflags ) + gradientflags ) } else { allflags = passflags q = 1 while ( q <= ndatacols ) { allflags[ , q ] = as.integer( ( 1000000 * knownflags[ , q ] ) + ( 100000 * rangeflags[ , q ] ) + ( 10000 * climateflags[ , q ] ) + ( 1000 * spikeflags[ , q ] ) + ( 100 * rateflags[ , q ] ) + ( 10 * flatflags[ , q ] ) + gradientflags[ , q ] ) q = q + 1 } } } # ******************** # * Output Data File * # ******************** cat( "\n[Output Data File]\n", sep="" ) sink( OutputFile, append = AppendFlag ) # Output metadata and header comments unless appending if ( ! AppendFlag ) { i = 1 while ( i <= nheaderdata ) { cat( headerdata[ i ], "\r\n", sep="" ) i = i + 1 } } # Output data i = icsvdata - idata + 1 # Loop i indexes into data[] j = icsvdata # Loop j indexes into csvtextdata[] n = 0 while ( i <= ndata ) { if ( ! AllFlags ) { # Output data with summary flag cat( sprintf( "%s", csvtextdata[ j ] ), sep="" ) if ( ndatacols == 1 ) { cat( ", ", summaryflags[ i ], "\r\n", sep="" ) } else { q = 1 while ( q <= ndatacols ) { cat( ", ", summaryflags[ i, q ], sep="" ) q = q + 1 } cat( "\r\n", sep="" ) } } else { # Output data with all flags (allflags option) cat( csvtextdata[ j ], sep="" ) if ( ndatacols == 1 ) { cat( ", ", summaryflags[ i ], ", ", allflags[ i ], "\r\n", sep="" ) } else { q = 1 while ( q <= ndatacols ) { cat( ", ", summaryflags[ i, q ], sep="" ) q = q + 1 } q = 1 while ( q <= ndatacols ) { cat( ", ", allflags[ i, q ], sep="" ) q = q + 1 } cat( "\r\n", sep="" ) } } i = i + 1 j = j + 1 n = n + 1 } sink() if ( AppendFlag ) { cat( "[Appended " ) } else { cat( "[Wrote ", nheaderdata, " header lines]\n", sep="" ) cat( "[Wrote " ) } cat( n, " data lines to ", OutputFile, "]\n\n", sep="" ) # Done progdonetime = as.integer( Sys.time() ) elapsedtime = progdonetime - progstarttime cat( "[Program elapsed time = ", elapsedtime, " secs]\n\n", sep="" ) cat( "[QartodCalc.R done at ", as.character( Sys.time() ), "]\n", sep="" )