Tuesday, March 6, 2012

Mssql Regex

столкнулся с проблемой когда в скуле необходимо воспользоваться регулярными выражениями, после не долгого шуршания по гуглу нашел ресурс 1. разрешаем скулу пользоваться Ole Automation Procedures 2. Создаем в скуле необходимые процедуры
sp_configure 'show advanced options', 1;
GO
RECONFIGURE;
GO
sp_configure 'Ole Automation Procedures', 1;
GO
RECONFIGURE;
GO
/*
The OLE Regex Match function
-----------------------------

Let's start off with something simple, a function for testing a string against a regular expression */

IF OBJECT_ID (N'dbo.RegexMatch') IS NOT NULL
   DROP FUNCTION dbo.RegexMatch
GO

CREATE FUNCTION dbo.RegexMatch
    (
      @pattern VARCHAR(2000),
      @matchstring VARCHAR(MAX)--Varchar(8000) got SQL Server 2000
    )
RETURNS INT
/* The RegexMatch returns True or False, indicating if the regular expression matches (part of) the string. (It returns null if there is an error).
When using this for validating user input, you'll normally want to check if the entire string matches the regular expression. To do so, put a caret at the start of the regex, and a dollar at the end, to anchor the regex at the start and end of the subject string.
*/ 
AS BEGIN
    DECLARE @objRegexExp INT,
        @objErrorObject INT,
        @strErrorMessage VARCHAR(255),
        @hr INT,
        @match BIT

    SELECT  @strErrorMessage = 'creating a regex object'
    EXEC @hr= sp_OACreate 'VBScript.RegExp', @objRegexExp OUT
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'Pattern', @pattern
        --Specifying a case-insensitive match 
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'IgnoreCase', 1
        --Doing a Test' 
    IF @hr = 0 
        EXEC @hr= sp_OAMethod @objRegexExp, 'Test', @match OUT, @matchstring
    IF @hr <> 0 
        BEGIN
            RETURN NULL
        END
    EXEC sp_OADestroy @objRegexExp
    RETURN @match
   END
GO

/*

With this function, the passing back of errors is rudimentary. If an OLE error occurs, then a null is passed back.

There are two other basic Regex functions available. With them, you can use regular expressions in all sorts of places in TSQL without having to get to direct grips with the rather awkward OLE interface.

The OLE Regex Replace function
-----------------------------

*/


IF OBJECT_ID(N'dbo.RegexReplace') IS NOT NULL 
    DROP FUNCTION dbo.RegexReplace
GO
CREATE FUNCTION dbo.RegexReplace
    (
      @pattern VARCHAR(255),
      @replacement VARCHAR(255),
      @Subject VARCHAR(MAX),
      @global BIT = 1,
     @Multiline bit =1
    )
RETURNS VARCHAR(MAX)

/*The RegexReplace function takes three string parameters. The pattern (the regular expression) the replacement expression, and the subject string to do the manipulation to.

The replacement expression is one that can cause difficulties. You can specify an empty string '' as the @replacement text. This will cause the Replace method to return the subject string with all regex matches deleted from it (see "strip all HTML elements out of a string" below). 
To re-insert the regex match as part of the replacement, include $& in the replacement text. (see "find a #comment and add a TSQL --" below)
If the regexp contains capturing parentheses, you can use backreferences in the replacement text. $1 in the replacement text inserts the text matched by the first capturing group, $2 the second, etc. up to $9. (e.g. see import delimited text into a database below) To include a literal dollar sign in the replacements, put two consecutive dollar signs in the string you pass to the Replace method.*/
AS BEGIN
    DECLARE @objRegexExp INT,
        @objErrorObject INT,
        @strErrorMessage VARCHAR(255),
        @Substituted VARCHAR(8000),
        @hr INT,
        @Replace BIT

    SELECT  @strErrorMessage = 'creating a regex object'
    EXEC @hr= sp_OACreate 'VBScript.RegExp', @objRegexExp OUT
    IF @hr = 0 
        SELECT  @strErrorMessage = 'Setting the Regex pattern',
                @objErrorObject = @objRegexExp
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'Pattern', @pattern
    IF @hr = 0 /*By default, the regular expression is case sensitive. Set the IgnoreCase property to True to make it case insensitive.*/
        SELECT  @strErrorMessage = 'Specifying the type of match' 
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'IgnoreCase', 1
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'MultiLine', @Multiline
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'Global', @global
    IF @hr = 0 
        SELECT  @strErrorMessage = 'Doing a Replacement' 
    IF @hr = 0 
        EXEC @hr= sp_OAMethod @objRegexExp, 'Replace', @Substituted OUT,
            @subject, @Replacement
     /*If the RegExp.Global property is False (the default), Replace will return the @subject string with the first regex match (if any) substituted with the replacement text. If RegExp.Global is true, the @Subject string will be returned with all matches replaced.*/   
    IF @hr <> 0 
        BEGIN
            DECLARE @Source VARCHAR(255),
                @Description VARCHAR(255),
                @Helpfile VARCHAR(255),
                @HelpID INT
   
            EXECUTE sp_OAGetErrorInfo @objErrorObject, @source OUTPUT,
                @Description OUTPUT, @Helpfile OUTPUT, @HelpID OUTPUT
            SELECT  @strErrorMessage = 'Error whilst '
                    + COALESCE(@strErrorMessage, 'doing something') + ', '
                    + COALESCE(@Description, '')
            RETURN @strErrorMessage
        END
    EXEC sp_OADestroy @objRegexExp
    RETURN @Substituted
   END
GO

/*
The OLE Regex Find (Execute) function
-----------------------------

This is the most powerful function for doing complex finding and replacing of text. As it passes back detailed records of the hits, including the location and the backreferences, it allows for complex manipulations.

This is written as a table function. The Regex Routine actually passes back a collection for each 'hit'. In the relational world, you'd normally represent this in two tables, so we've returned a left outer join of the two logical tables so as to pass back all the information. This seems to cater for all the uses we can think of. We also append an error column, which should be blank!

*/

IF OBJECT_ID(N'dbo.RegexFind') IS NOT NULL 
    DROP FUNCTION dbo.RegexFind
GO
CREATE FUNCTION RegexFind(
    @pattern VARCHAR(255),
    @matchstring VARCHAR(MAX),
    @global BIT = 1,
   @Multiline bit =1)
RETURNS
    @result TABLE
        (
        Match_ID INT,
          FirstIndex INT ,
          length INT ,
          Value VARCHAR(2000),
          Submatch_ID INT,
          SubmatchValue VARCHAR(2000),
         Error VARCHAR(255)
        )


AS -- columns returned by the function
   BEGIN
    DECLARE @objRegexExp INT,
        @objErrorObject INT,
        @objMatch INT,
        @objSubMatches INT,
        @strErrorMessage VARCHAR(255),
       @error VARCHAR(255),
        @Substituted VARCHAR(8000),
        @hr INT,
        @matchcount INT,
        @SubmatchCount INT,
        @ii INT,
        @jj INT,
        @FirstIndex INT,
        @length INT,
        @Value VARCHAR(2000),
        @SubmatchValue VARCHAR(2000),
        @objSubmatchValue INT,
        @command VARCHAR(8000),
        @Match_ID INT
        
    DECLARE @match TABLE
        (
          Match_ID INT IDENTITY(1, 1)
                       NOT NULL,
          FirstIndex INT NOT NULL,
          length INT NOT NULL,
          Value VARCHAR(2000)
        )    
    DECLARE @Submatch TABLE
        (
          Submatch_ID INT IDENTITY(1, 1),
          match_ID INT NOT NULL,
          SubmatchNo INT NOT NULL,
          SubmatchValue VARCHAR(2000)
        )
       


    SELECT  @strErrorMessage = 'creating a regex object',@error=''
    EXEC @hr= sp_OACreate 'VBScript.RegExp', @objRegexExp OUT
    IF @hr = 0 
        SELECT  @strErrorMessage = 'Setting the Regex pattern',
                @objErrorObject = @objRegexExp
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'Pattern', @pattern
    IF @hr = 0 
        SELECT  @strErrorMessage = 'Specifying a case-insensitive match' 
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'IgnoreCase', 1
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'MultiLine', @Multiline
    IF @hr = 0 
        EXEC @hr= sp_OASetProperty @objRegexExp, 'Global', @global
    IF @hr = 0 
        SELECT  @strErrorMessage = 'Doing a match' 
    IF @hr = 0 
        EXEC @hr= sp_OAMethod @objRegexExp, 'execute', @objMatch OUT,
            @matchstring
    IF @hr = 0 
        SELECT  @strErrorMessage = 'Getting the number of matches'     
    IF @hr = 0 
        EXEC @hr= sp_OAGetProperty @objmatch, 'count', @matchcount OUT
    SELECT  @ii = 0 
    WHILE @hr = 0
        AND @ii < @Matchcount
        BEGIN
/*The Match object has four read-only properties. 
The FirstIndex property indicates the number of characters in the string to the left of the match. 
The Length property of the Match object indicates the number of characters in the match. 
The Value property returns the text that was matched.*/
            SELECT  @strErrorMessage = 'Getting the FirstIndex property',
                    @command = 'item(' + CAST(@ii AS VARCHAR) + ').FirstIndex'    
            IF @hr = 0 
                EXEC @hr= sp_OAGetProperty @objmatch, @command,
                    @Firstindex OUT
            IF @hr = 0 
                SELECT  @strErrorMessage = 'Getting the length property',
                        @command = 'item(' + CAST(@ii AS VARCHAR) + ').Length'    
            IF @hr = 0 
                EXEC @hr= sp_OAGetProperty @objmatch, @command, @Length OUT
            IF @hr = 0 
                SELECT  @strErrorMessage = 'Getting the value property',
                        @command = 'item(' + CAST(@ii AS VARCHAR) + ').Value'    
            IF @hr = 0 
                EXEC @hr= sp_OAGetProperty @objmatch, @command, @Value OUT
            INSERT  INTO @match
                    (
                      Firstindex,
                      [Length],
                      [Value]
                    )
                    SELECT  @firstindex + 1,
                            @Length,
                            @Value
            SELECT  @Match_ID = @@Identity         
/*The SubMatches property of the Match object is a collection of strings. It will only hold values if your regular expression has capturing groups. The collection will hold one string for each capturing group. The Count property (returned as SubmatchCount) indicates the number of string in the collection. The Item property takes an index parameter, and returns the text matched by the capturing group. 
*/
            IF @hr = 0 
                SELECT  @strErrorMessage = 'Getting the SubMatches collection',
                        @command = 'item(' + CAST(@ii AS VARCHAR)
                        + ').SubMatches'    
            IF @hr = 0 
                SELECT  @strErrorMessage = 'Getting the number of submatches'     
            IF @hr = 0 
                EXEC @hr= sp_OAGetProperty @objSubmatches, 'count',
                    @submatchCount OUT
            SELECT  @jj = 0 
            WHILE @hr = 0
                AND @jj < @submatchCount
                BEGIN
                    IF @hr = 0 
                        SELECT  @strErrorMessage = 'Getting the submatch value property',
                                @command = 'item(' + CAST(@jj AS VARCHAR)
                                + ')' ,@submatchValue=NULL   
                    IF @hr = 0 
                        EXEC @hr= sp_OAGetProperty @objSubmatches, @command,
                            @SubmatchValue OUT
                    INSERT  INTO @Submatch
                            (
                              Match_ID,
                              SubmatchNo,
                              SubmatchValue
                            )
                            SELECT  @Match_ID,
                                    @jj+1,
                                    @SubmatchValue
                    SELECT  @jj = @jj + 1
                END   
                EXEC @hr= sp_OAGetProperty @objmatch, @command,
                    @objSubmatches OUT     
            SELECT  @ii = @ii + 1
        END
    IF @hr <> 0 
        BEGIN
            DECLARE @Source VARCHAR(255),
                @Description VARCHAR(255),
                @Helpfile VARCHAR(255),
                @HelpID INT
   
            EXECUTE sp_OAGetErrorInfo @objErrorObject, @source OUTPUT,
                @Description OUTPUT, @Helpfile OUTPUT, @HelpID OUTPUT
            SELECT  @Error = 'Error whilst '
                    + COALESCE(@strErrorMessage, 'doing something') + ', '
                    + COALESCE(@Description, '')
        END
    EXEC sp_OADestroy @objRegexExp
     EXEC sp_OADestroy        @objMatch
     EXEC sp_OADestroy        @objSubMatches

INSERT INTO @result
          (Match_ID,
          FirstIndex,
          [length],
          [Value],
          Submatch_ID,
          SubmatchValue,
         error)


    SELECT  m.[Match_ID],
           [FirstIndex],
           [length],
           [Value],[SubmatchNo],
           [SubmatchValue],@error
  FROM    @match m
    LEFT OUTER JOIN   @submatch s
    ON m.match_ID=s.match_ID    
IF @@rowcount=0 AND LEN(@error)>0
INSERT INTO @result(error) SELECT @error
RETURN 
END
GO

No comments:

Post a Comment