2009-09-11

SQL Server stored procedure to return file information

An earlier version of SQL Server had the undocumented procedure "xp_getfileinfo" to return file information, but alas, it has been dropped from SQL Server 2005. On searching the 'net there are a few suggested workarounds.

One uses the "sp_OA*" procedures to access the FileSystemObject and return the requested information, others solve the task by implementing a CLR function. A third approach relies on "xp_cmdshell" and interpreting the output from a "dir" command. The last approach appeals the most to me, however, there's a problem if you want to be locale independent, the date and time formats produced by dir vary: dd/mm/yyyy, mm.dd.yyyy, hh:mm (24 hour format) hh:mmam/pm, etc. So the challenge is to interpret dir's output and return the information in a standard format. I decided on a two step process where the first step samples the output of a larger number of files to infer the current date format, with the second step applying the results to returning the info for the requested file. Here it is:
IF OBJECT_ID('dbo.p_FileInfo', 'P') IS NOT NULL DROP PROCEDURE dbo.p_FileInfo
go
CREATE PROCEDURE dbo.p_FileInfo
    (@p_FileName varchar(250),
     @p_FileDate varchar(20) = NULL OUTPUT,  -- Format YYYYMMDD_HHMM
     @p_FileSize int = NULL OUTPUT)          -- Size in bytes
AS
    SET NOCOUNT ON

    -- Check if file exists, otherwise don't bother ...
    declare @err int
    exec master.dbo.xp_fileexist @p_FileName, @err OUTPUT;
    if @err = 0 return(1);

    -- The first part of this procedure tries to determine the current date format
    -- used in "DIR" output, by analysing a sampling of the output.
    CREATE TABLE #diroutput(line varchar(100), sDate varchar(20), sTime varchar(10), 
                            FileSize int, sName varchar(100))

    insert #diroutput(line) Exec master.dbo.xp_cmdshell 
        'dir /A-D /TW /-C /4 %SYSTEMROOT%\TEMP %TEMP% | findstr /R "^[0-9]"'
    delete #diroutput where line is null

    declare @line varchar(100);
    declare @line1 varchar(100);

    set @line = (select top 1 line from #diroutput);

    declare @iDateStart smallint
    declare @iDateEnd smallint
    declare @iTimeStart smallint
    declare @iTimeEnd smallint
    declare @iSizeStart smallint
    declare @iSizeEnd smallint
    declare @iNameStart smallint

    set @iDateEnd = CHARINDEX(' ', @line);
    set @line1 = STUFF(@line, 1, @iDateEnd, REPLICATE(' ', @iDateEnd));

    set @iTimeStart = @iDateEnd + 1;
    set @iTimeEnd = CHARINDEX(' ', @line1, PATINDEX('%[0-9]%', @line1));
    set @line1 = STUFF(@line, 1, @iTimeEnd, REPLICATE(' ', @iTimeEnd));
    if LTRIM(@line1) LIKE '[AP]M %'
    begin
        set @iTimeEnd = CHARINDEX(' ', @line1, CHARINDEX('M', @line1));
        set @line1 = STUFF(@line, 1, @iTimeEnd, REPLICATE(' ', @iTimeEnd));
    end;

    set @iSizeStart = @iTimeEnd + 1;
    set @iSizeEnd = CHARINDEX(' ', @line1, PATINDEX('%[0-9]%', @line1));
    set @line1 = STUFF(@line, 1, @iSizeEnd, REPLICATE(' ', @iSizeEnd));

    set @iNameStart = @iSizeEnd + 1;

    UPDATE #diroutput SET
           sDate = left(line, @iDateEnd-1)
          ,sTime = LTRIM(substring(line, @iTimeStart, @iTimeEnd - @iTimeStart))
          ,FileSize = convert(int, LTRIM(substring(line, @iSizeStart, 
                                                   @iSizeEnd - @iSizeStart)))
          ,sName = LTRIM(substring(line, @iNameStart, LEN(line) - @iNameStart + 1));

    declare @sDateSample varchar(20)
    declare @dateSep char(1)
    set @sDateSample = (select top 1 sDate from #diroutput);
    set @dateSep = SUBSTRING(@sDateSample, PATINDEX('%[^0-9]%', @sDateSample), 1);

    declare @sTimeSample varchar(10)
    declare @timeSep char(1)
    set @sTimeSample = (select top 1 sTime from #diroutput);
    set @timeSep = SUBSTRING(@sTimeSample, PATINDEX('%[^0-9]%', @sTimeSample), 1);

    -- Three parts of a datestring
    declare @DateParts 
        TABLE (
            PartNo smallint, 
            PartStart smallint, 
            PartLen smallint, 
            PartName varchar(10));

    declare @iPartStart smallint, @iPartLen smallint

    -- First part
    set @iPartStart = 1;
    set @iPartLen = CHARINDEX(@dateSep, @sDateSample) - 1;
    INSERT @DateParts(PartNo, PartStart, PartLen) SELECT 1, @iPartStart, @iPartLen;
    -- Second part
    set @iPartStart = @iPartStart + @iPartLen + 1;
    set @iPartLen = CHARINDEX(@dateSep, @sDateSample, @iPartStart) - @iPartStart;
    INSERT @DateParts(PartNo, PartStart, PartLen) SELECT 2, @iPartStart, @iPartLen;
    -- Third part
    set @iPartStart = @iPartStart + @iPartLen + 1;
    set @iPartLen = LEN(@sDateSample) - @iPartStart + 1;
    INSERT @DateParts(PartNo, PartStart, PartLen) SELECT 3, @iPartStart, @iPartLen;

    UPDATE dp
        set PartName = case
                          when dpMax.maxval <= 12 then 'MONTH'
                          when dpMax.maxval <= 31 then 'DAY'
                          else 'YEAR'
                       end
    FROM @DateParts dp JOIN
        (select max(PartNo) As PartNo, max(substring(sDate, PartStart, PartLen)) As maxval
         from #diroutput o cross join @DateParts p group by p.PartNo) dpMax 
              on dp.PartNo = dpMax.PartNo;

    -- In the second part we retrieve the information about the file
    declare @cmd varchar(1000)
    TRUNCATE TABLE #diroutput
    set @cmd = 'dir /A-D /TW /-C /4 "' + @p_FileName + '" | findstr /R "^[0-9]"'
    insert #diroutput(line) Exec master.dbo.xp_cmdshell @cmd
    delete #diroutput where line is null

    UPDATE #diroutput SET
           sDate = left(line, @iDateEnd-1)
          ,sTime = LTRIM(substring(line, @iTimeStart, @iTimeEnd - @iTimeStart))
          ,FileSize = convert(int, LTRIM(substring(line, @iSizeStart, 
                                                   @iSizeEnd - @iSizeStart)))
          ,sName = LTRIM(substring(line, @iNameStart, LEN(line) - @iNameStart + 1));

    -- Perform AM/PM conversion to 24 hour clock (just in case)
    UPDATE #diroutput SET
        sTime = case
                    when sTime LIKE '12%a%'     then 
                        '00' + SUBSTRING(sTime, 4, 2)
                    when sTime LIKE '0%p%'      then
                        convert(varchar(2), 12 + convert(int, left(sTime, 2))) 
                        + SUBSTRING(sTime, 4, 2)
                    when sTime LIKE '1[01]%p%'  then
                        convert(varchar(2), 12 + convert(int, left(sTime, 2))) 
                        + SUBSTRING(sTime, 4, 2)
                    else left(sTime, 2) + SUBSTRING(sTime, 4, 2)
                end;

    SELECT @p_FileDate = SUBSTRING(sDate, Y.PartStart, Y.PartLen) 
                       + SUBSTRING(sDate, M.PartStart, M.PartLen) 
                       + SUBSTRING(sDate, D.PartStart, D.PartLen) 
                       + '_' + sTime,
          @p_FileSize = FileSize
        FROM #diroutput
        CROSS JOIN (select PartStart, PartLen from @DateParts where PartName = 'YEAR') as Y
        CROSS JOIN (select PartStart, PartLen from @DateParts where PartName = 'MONTH') as M
        CROSS JOIN (select PartStart, PartLen from @DateParts where PartName = 'DAY') as D;

    return(0);
go

No comments: