BenfordXcli

BenfordXcli was developed for the BenfordBench project at benfordbench.org as a way to check for fraud in large data pools using Benford’s Law of Scalar Invariance. The software will require command line flags in specification of the program input. The program relies also on Linux systems and is currently only available to adapted Linux systems. Source code below:

    'This file is part of bxc/benford.

    'bxc/benford aka Benford Bench is free software: you can redistribute
    'it and/or modify it under the terms of the GNU General Public License
    'as published by the Free Software Foundation, either version 3 of the
    'License, or (at your option) any later version.

    'bxc/benford is distributed in the hope that it will be useful,
    'but WITHOUT ANY WARRANTY; without even the implied warranty of
    'MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    'GNU General Public License for more details.

    'You should have received a copy of the GNU General Public License
    'along with bxc/benford.  If not, see <https://www.gnu.org/licenses/>.
    
    'bxc/benford is written by Jason S. Page as part of the Benford Bench
    'project, benfordbench.org in operation since 2016. Benford Bench
    'was created to crowd source efforts for fraud identification and reporting
    'in big data. 10,000 digits are needed to get an idea if data fits the
    'Benford curve.
    
    'This program uses OS specific operations and will only work in
    'Unix/Linux. There are no plans to make this available for DOS/Windows
    'however may work and compile with GNU DOS command line tools in OS/2.
    '
    'The Benford Bench Project as of this writing consist of the following
    'volunteers:
    '
    'Jason Page
    'Morris Chukhman
    'Padraig O'Hara
    'Kevin Perez
    'Michael Fiedler
    '
    'source: codebas.org/pagetelegram/BenfordBench


dim n(255)															' Set array to handle data lines up to 255 chars
dim toke as integer 
express$=command$(-1)
' -f = load1$, -d = a12$ [all or 1], -l = prog$, -c = column$
print express$
toke=0
tat=0
dc=0
do
 i=0
 select case mid$(express$,dc,3)
  case " -h"
   print "bxc -f [data file] -d [all] or [1] -l [length of sample pools (ie 10000)] -c [column number] - {terminal}":print "Use 0 for columns if only one column of data in input file":system

  case "-f "   ' load file
    tat=tat+1
    do
    
     tif$=tif$+tit$
     i=i+1
     tit$=mid$(express$,dc+2+i,1)
     tits$=mid$(express$,dc+i+2,2)
      
    loop until tits$=" -" or i > len(express$)

 print " -f ";tif$

  i=0

  case " -d"    														' all or 1st digit
   tat=tat+1

 	do
    
     if i>0 then tid$=tid$+tit$
    
     i=i+1
     tit$=mid$(express$,dc+2+i,1)
    
    loop until tit$="-" or i > len(express$)
    
   print " -d"; tid$  

  i=0  

  case " -l"    														' total count per sample
	tat=tat+1
     do
    
      if i>0 then til$=til$+tit$
      i=i+1
      tit$=mid$(express$,dc+i+2,1)
      
    loop until tit$="-" or i > len(express$)

   print " -l"; til$  

  i=0  

  case " -c"    														' column number with comma deliminated
	tat=tat+1
	do
    
     if i>0 then tic$=tic$+tit$
     i=i+1
     tit$=mid$(express$,dc+i+2,1)

    loop until tit$="-" or i > len(express$)

  i=0   

  print " -c ";tic$  

end select
dc=dc+1

loop until dc=len(express$)

if tat=4 then                                  							' if not enough arguments
 print "Total flags=";tat
 print "Arguments ";tif$;",";tid$;",";til$;",";tic$
 print "Press any key to continue to prompts, or use -h as a flag in command for command line help."
 'sleep 3
 load1$=ltrim$(rtrim$(tif$)):a12$=ltrim$(rtrim$(tid$)):prog$=ltrim$(rtrim$(til$)): col$=ltrim$(rtrim$(tic$))
 print load1$, a12$,prog$,col$

  if col$="0" then col$="1" 

   if left$(tif$,3)="ftp" or left$(tif$,3)="htt" then 					' check for web link; if true then download the data file
    shell "wget -c "+tif$ 				   								' get data file
    shell "ls -w1 -t > file.nam" 										' get the latest downloaded file to read from the file list newest first
     open "file.nam" for input as #11
      if not (eof(11)) then input #11,filenam$
      if not (eof(11)) then input #11,filenam$
     print "Downloaded file>";filenam$ 
    close #11
   tif$=filenam$
   print tif$
  ' 3
  
  ' -f = load1$, -d = a12$ [all or 1], -l = prog$, -c = col$
   load1$=ltrim$(rtrim$(tif$)):a12$=ltrim$(rtrim$(tid$)):prog$=ltrim$(rtrim$(til$)): col$=ltrim$(rtrim$(tic$))
   print load1$, a12$,prog$,col$
  'print "Press any key to continue"
   '4
   end if


goto 2:
   else
   print "Total flags=";tat;". Must be a total of 4 to qualify headless operation."
goto 1:
  end if
1:
 shell "ls *.dat"														' List all dat files
 input "Load>",load1$													' Ask for data file for analysis 
 input "[A]ll Digits [1]st Digit?>",a12$     							' Ask whether to count all digits or first
 input "Capture Average Every (default: 10000) Points?>",prog$ 			' Specify count / data plot segments
 input "Which Column Number (0=default: single column data only)",col$
2:
if len(prog$)=0 then prog$="10000"										' If no input specified then default vale = 10000
 shell "echo > "+load1$+"_"+a12$+"-"+prog$+"-.log"						' Empty data log file of values
 shell "echo > "+load1$+"_"+a12$+"-"+prog$+"_.log"  					' Empty data log file of percentages

if val(col$)<>0 and val(col$)<>1 then 
 shell "cut -f"+ltrim$(rtrim$((col$)))+" -d',' "+load1$+" > "+"c"+col$+"_"+load1$
 print "Command Using:"+" cut -f"+ltrim$(rtrim$((col$)))+" -d',' "+load1$+" > "+"c"+col$+"_"+load1$
 toad$="c"+col$+"_"+load1$
 load1$=toad$
end if

tc=0:c=0:c1=0:c2=0:c3=0:c4=0:c5=0:c6=0:c7=0:c8=0:c9=0					' Reset counters on start

print
print "Benford X-C Forensics Digital Analysis Tool by Jason Page 5-10-2020"
print "-------------------------------------------------------------------"
color 12,14
print "    Time,  1,  2,  3,  4,  5, 6, 7, 8, 9"
color 7,0
 open load1$ for input as #1											' Load data file for analysis

  if not (eof(1)) then line input #1,line1$								' clear header
   do
																		' record count to total record count
    if not (eof(1)) then
		input #1,ot$	 											  	' Load values per line from data file
     toke=toke+1 														' toke count lines
     shell "echo "+str$(toke)+" >> tokes.txt"							' Debug toke for line counts read from data file
    end if
 
   if c >= val(prog$) then												' If segment within range then do the analysis
    c$=str$(c)    : c6$=str$(c6)										' Perform value format to strings
    c1$=str$(c1)  : c7$=str$(c7)
    c2$=str$(c2)  : c8$=str$(c8) 
    c3$=str$(c3)  : c9$=str$(c9)
    c4$=str$(c4)
    c5$=str$(c5)
    p6$=str$(int(((c6*100)/(c*100))*100))								' Perform percent calculation to strings
    p1$=str$(int(((c1*100)/(c*100))*100))
    p7$=str$(int(((c7*100)/(c*100))*100))
    p2$=str$(int(((c2*100)/(c*100))*100))
    p8$=str$(int(((c8*100)/(c*100))*100))
    p3$=str$(int(((c3*100)/(c*100))*100))
    p9$=str$(int(((c9*100)/(c*100))*100))
    p4$=str$(int(((c4*100)/(c*100))*100))
    p5$=str$(int(((c5*100)/(c*100))*100))

    cc1$=ot$+","+str$(toke)+","+c1$+","+c2$+","+c3$+","+c4$+","+c5$+","+c6$+","+c7$+","+c8$+","+c9$  
    ' ^ Set values for storage
    cc2$=ot$+","+str$(toke)+","+p1$+","+p2$+","+p3$+","+p4$+","+p5$+","+p6$+","+p7$+","+p8$+","+p9$  
    ' ^ Set percentages for storage
    color 14,12

    print "#:";cc1$
    color 12,14
    print "%:";cc2$ 
    color 7,0
    shell "echo "+cc1$+" >> "+load1$+"_"+a12$+"-"+prog$+"-.log"			' Store values
    shell "echo "+cc2$+" >> "+load1$+"_"+a12$+"-"+prog$+"_.log"			' Store percentages
    c=0:c1=0:c2=0:c3=0:c4=0:c5=0:c6=0:c7=0:c8=0:c9=0  					' Reset counters

   end if

   if left$((ucase$(a12$)),1)="A" then position=len(ot$) 				' Count all digits
   if ucase$(a12$)="1" then position=1 									' Count only first digit
    
    for i=1 to position  												' count to position
     n(i)=val(mid$(ot$,i,1))
      select case n(i)  												' count individual digits specified in array of opened file
       case 1
        c=c+1
        c1=c1+1  
       case 2
        c=c+1
        c2=c2+1  
       case 3
        c=c+1
        c3=c3+1  
       case 4
        c=c+1
        c4=c4+1  
       case 5
        c=c+1
        c5=c5+1  
       case 6
        c=c+1
        c6=c6+1  
       case 7
        c=c+1
        c7=c7+1
       case 8
        c=c+1
        c8=c8+1
       case 9
        c=c+1
        c9=c9+1
      end select
    next i 

   eas$="":xss$=""
 
  loop until (eof(1)) 
 close #1
 
print 
print
print "Total lines:"; str$(toke)
print "Generating the ASCII Chart..."  									 'create the charts
print load1$,a12$,prog$
filenam$=load1$+"_"+a12$+"-"+prog$+"_.log"
print filenam$
 open filenam$ for input as #12
  if not(eof(12)) then line input #12,empty$
   shell "rm chart_"+filenam$+".txt"
   ccc=0
    do

     if not(eof(12)) then input #12, toss$, recd$, throw1$, throw2$, throw3$, throw4$, throw5$, throw6$, throw7$, throw8$, throw9$

     for i=1 to val(throw1$)
      as1$=as1$+"1"
     next i

     for i=1 to val(throw2$)
      as2$=as2$+"2"
     next i

     for i=1 to val(throw3$)
      as3$=as3$+"3"
     next i

     for i=1 to val(throw4$)
      as4$=as4$+"4"
     next i
 
    for i=1 to val(throw5$)
     as5$=as5$+"5"
    next i

    for i=1 to val(throw6$)
     as6$=as6$+"6"
    next i

    for i=1 to val(throw7$)
     as7$=as7$+"7"
    next i

    for i=1 to val(throw8$)
     as8$=as8$+"8"
    next i

    for i=1 to val(throw9$)
     as9$=as9$+"9"
    next i
   ccc=ccc+1
   fnam$="chart_"+filenam$
   shell "echo record number " + recd$ + " at value " + toss$ + " >> " + fnam$
   shell "echo "+as1$ + " " + " at "+throw1$+ "% >> " + fnam$
   shell "echo "+as2$ + " " + " at "+throw2$+ "% >> " + fnam$
   shell "echo "+as3$ + " " + " at "+throw3$+ "% >> " + fnam$
   shell "echo "+as4$ + " " + " at "+throw4$+ "% >> " + fnam$
   shell "echo "+as5$ + " " + " at "+throw5$+ "% >> " + fnam$
   shell "echo "+as6$ + " " + " at "+throw6$+ "% >> " + fnam$
   shell "echo "+as7$ + " " + " at "+throw7$+ "% >> " + fnam$
   shell "echo "+as8$ + " " + " at "+throw8$+ "% >> " + fnam$
   shell "echo "+as9$ + " " + " at "+throw9$+ "% >> " + fnam$
   as1$="":as2$="":as3$="":as4$="":as5$="":as6$="":as7$="":as8$="":as9$=""
   'shell "echo Chart #"+str$(ccc)+" >> "+fnam$
  loop until(eof(12))
 close #12

shell "mv "+fnam$+" "+fnam$+".txt"
print "head:"
shell "head -n10 "+fnam$+".txt"
print "tail:"
shell "tail -n10 "+fnam$+".txt"
shell "rm "+filenum$
system