Wednesday, May 5, 2010

Oracle: How to replace non printable characters in a string

You can replace non printable characters in a string using regexp_replace

select regexp_replace('Test'||chr(10)||chr(13)||chr(1)||' String ', '([^[:print:]])',' ') from dual

If you want to retain the the new line characters like chr(10) and chr(13) then

Select regexp_replace('Test'||chr(10)||chr(13)||chr(1)||' String ','['||chr(1)||'-'||chr(9)||']|['||chr(11)||'-'||chr(12)||']|['||chr(14)||'-'||chr(31)||']',' ') from dual

Above sql is removing chr from 1-9, 11-12 and 14-31



Here is SQL to identify printable, non printable, control characters

select level,CHR(level),
decode(chr(level), regexp_substr(chr(level), '[[:graph:]]'), '*') is_graph, --printable character
decode(chr(level), regexp_substr(chr(level), '[[:blank:]]'), '*') is_blank,
decode(chr(level), regexp_substr(chr(level), '[[:alnum:]]'), '*') is_alnum, --Any alphanumeric character, [A-Za-z0-9]
decode(chr(level), regexp_substr(chr(level), '[[:alpha:]]'), '*') is_alpha, -- Any letter, [A-Za-z]
decode(chr(level), regexp_substr(chr(level), '[[:digit:]]'), '*') is_digit, -- Any digit, [0-9]
decode(chr(level), regexp_substr(chr(level), '[[:cntrl:]]'), '*') is_cntrl, --Any character not part of the character classes: [:upper:], [:lower:], [:alpha:], [:digit:], [:punct:], [:graph:], [:print:], [:xdigit:]
decode(chr(level), regexp_substr(chr(level), '[[:lower:]]'), '*') is_lower, -- Any lowercase letter, [a-z]
decode(chr(level), regexp_substr(chr(level), '[[:upper:]]'), '*') is_upper, --Any uppercase letter, [A-Z]
decode(chr(level), regexp_substr(chr(level), '[[:print:]]'), '*') is_print, --Any printable character
decode(chr(level), regexp_substr(chr(level), '[[:punct:]]'), '*') is_punct, --Any punctuation character: START ! ' # S % & ' ( ) * + , - . / : ; < = > ? @ [ / ] ^ _ { | } ~ END
decode(chr(level), regexp_substr(chr(level), '[[:space:]]'), '*') is_space, --A tab, new line, vertical tab, form feed, carriage return, or space
decode(chr(level), regexp_substr(chr(level), '[[:xdigit:]]'), '*') is_xdigit --Any hexadecimal digit, [0-9A-Fa-f]
from dual
where level between 0 and 255
connect by level <= 256

4 comments: