% author: M.M.Haji % email : mehdi.haji@gmail.com % function y = remove_nontext5(x) % x: input image % y: text mask % remove non-text components from x using simple naive bayes classifier and nominal dct-18 features. % example: % % x = imread('image031.jpg'); % y = remove_nontext5(x); % z = rgb2gray(x); % z(find(y==false)) = 255; % imshow(z); load text_probs % now we have P(i,j,k) for i = 1,2, ...18 and j = 1,2,...5 and k = 1,2 % where, if L1='S2', L2='S1', L3='CE', L4='B1' and L5='B2' % V1 = 'Yes' and V2 = 'No' means P(Ai = Lj | Vk ) dct18_coefs = [4 5 6 12 13 14 20 21 22 44 45 46 52 53 54 60 61 62]; if ndims(x) == 3 x = rgb2gray(x); end x = imfilter(x,fspecial('average',3)); % noise removal <- VERY IMPORTANT [nr nc] = size(x); NR = ceil(nr/8) * 8; NC = ceil(nc/8) * 8; x = padarray(x,[NR-nr NC-nc],0,'post'); y = logical(zeros(size(x)/8)); for i1 = 1:8:NR for i2 = 1:8:NC J = dct2(x(i1:i1+7, i2:i2+7)); J = J'; J = J(:); J = J(dct18_coefs(1:18)); P_yes = 1; P_no = 1; % discretize f01: if J(1) <= -15.8 P_yes = P_yes * P(1,1,1); P_no = P_no * P(1,1,2); elseif J(1) <= -0.7 P_yes = P_yes * P(1,2,1); P_no = P_no * P(1,2,2); elseif J(1) <= 0.8 P_yes = P_yes * P(1,3,1); P_no = P_no * P(1,3,2); elseif J(1) <= 16.1 P_yes = P_yes * P(1,4,1); P_no = P_no * P(1,4,2); else P_yes = P_yes * P(1,5,1); P_no = P_no * P(1,5,2); end % discretize f02: if J(2) <= -13.1 P_yes = P_yes * P(2,1,1); P_no = P_no * P(2,1,2); elseif J(2) <= -0.4 P_yes = P_yes * P(2,2,1); P_no = P_no * P(2,2,2); elseif J(2) <= 0.3 P_yes = P_yes * P(2,3,1); P_no = P_no * P(2,3,2); elseif J(2) <= 11.3 P_yes = P_yes * P(2,4,1); P_no = P_no * P(2,4,2); else P_yes = P_yes * P(2,5,1); P_no = P_no * P(2,5,2); end % discretize f03: if J(3) <= -9.5 P_yes = P_yes * P(3,1,1); P_no = P_no * P(3,1,2); elseif J(3) <= -0.3 P_yes = P_yes * P(3,2,1); P_no = P_no * P(3,2,2); elseif J(3) <= 0.4 P_yes = P_yes * P(3,3,1); P_no = P_no * P(3,3,2); elseif J(3) <= 11.4 P_yes = P_yes * P(3,4,1); P_no = P_no * P(3,4,2); else P_yes = P_yes * P(3,5,1); P_no = P_no * P(3,5,2); end % discretize f04: if J(4) <= -11.5 P_yes = P_yes * P(4,1,1); P_no = P_no * P(4,1,2); elseif J(4) <= -0.5 P_yes = P_yes * P(4,2,1); P_no = P_no * P(4,2,2); elseif J(4) <= 0.4 P_yes = P_yes * P(4,3,1); P_no = P_no * P(4,3,2); elseif J(4) <= 11.3 P_yes = P_yes * P(4,4,1); P_no = P_no * P(4,4,2); else P_yes = P_yes * P(4,5,1); P_no = P_no * P(4,5,2); end % discretize f05: if J(5) <= -10 P_yes = P_yes * P(5,1,1); P_no = P_no * P(5,1,2); elseif J(5) <= -0.3 P_yes = P_yes * P(5,2,1); P_no = P_no * P(5,2,2); elseif J(5) <= 0.2 P_yes = P_yes * P(5,3,1); P_no = P_no * P(5,3,2); elseif J(5) <= 9.4 P_yes = P_yes * P(5,4,1); P_no = P_no * P(5,4,2); else P_yes = P_yes * P(5,5,1); P_no = P_no * P(5,5,2); end % discretize f06: if J(6) <= -6.3 P_yes = P_yes * P(6,1,1); P_no = P_no * P(6,1,2); elseif J(6) <= -0.3 P_yes = P_yes * P(6,2,1); P_no = P_no * P(6,2,2); elseif J(6) <= 0.2 P_yes = P_yes * P(6,3,1); P_no = P_no * P(6,3,2); elseif J(6) <= 6.6 P_yes = P_yes * P(6,4,1); P_no = P_no * P(6,4,2); else P_yes = P_yes * P(6,5,1); P_no = P_no * P(6,5,2); end % discretize f07: if J(7) <= -10.6 P_yes = P_yes * P(7,1,1); P_no = P_no * P(7,1,2); elseif J(7) <= -0.4 P_yes = P_yes * P(7,2,1); P_no = P_no * P(7,2,2); elseif J(7) <= 0.3 P_yes = P_yes * P(7,3,1); P_no = P_no * P(7,3,2); elseif J(7) <= 8.5 P_yes = P_yes * P(7,4,1); P_no = P_no * P(7,4,2); else P_yes = P_yes * P(7,5,1); P_no = P_no * P(7,5,2); end % discretize f08: if J(8) <= -7.3 P_yes = P_yes * P(8,1,1); P_no = P_no * P(8,1,2); elseif J(8) <= -0.2 P_yes = P_yes * P(8,2,1); P_no = P_no * P(8,2,2); elseif J(8) <= 0.2 P_yes = P_yes * P(8,3,1); P_no = P_no * P(8,3,2); elseif J(8) <= 6.2 P_yes = P_yes * P(8,4,1); P_no = P_no * P(8,4,2); else P_yes = P_yes * P(8,5,1); P_no = P_no * P(8,5,2); end % discretize f09: if J(9) <= -5.2 P_yes = P_yes * P(9,1,1); P_no = P_no * P(9,1,2); elseif J(9) <= -.2 P_yes = P_yes * P(9,2,1); P_no = P_no * P(9,2,2); elseif J(9) <= .2 P_yes = P_yes * P(9,3,1); P_no = P_no * P(9,3,2); elseif J(9) <= 4.8 P_yes = P_yes * P(9,4,1); P_no = P_no * P(9,4,2); else P_yes = P_yes * P(9,5,1); P_no = P_no * P(9,5,2); end % discretize f10: if J(10) <= -4.6 P_yes = P_yes * P(10,1,1); P_no = P_no * P(10,1,2); elseif J(10) <= -.2 P_yes = P_yes * P(10,2,1); P_no = P_no * P(10,2,2); elseif J(10) <= .2 P_yes = P_yes * P(10,3,1); P_no = P_no * P(10,3,2); elseif J(10) <= 4.3 P_yes = P_yes * P(10,4,1); P_no = P_no * P(10,4,2); else P_yes = P_yes * P(10,5,1); P_no = P_no * P(10,5,2); end % discretize f11: if J(11) <= -3.3 P_yes = P_yes * P(11,1,1); P_no = P_no * P(11,1,2); elseif J(11) <= -.1 P_yes = P_yes * P(11,2,1); P_no = P_no * P(11,2,2); elseif J(11) <= .2 P_yes = P_yes * P(11,3,1); P_no = P_no * P(11,3,2); elseif J(11) <= 3.7 P_yes = P_yes * P(11,4,1); P_no = P_no * P(11,4,2); else P_yes = P_yes * P(11,5,1); P_no = P_no * P(11,5,2); end % discretize f12: if J(12) <= -3.4 P_yes = P_yes * P(12,1,1); P_no = P_no * P(12,1,2); elseif J(12) <= -.2 P_yes = P_yes * P(12,2,1); P_no = P_no * P(12,2,2); elseif J(12) <= .2 P_yes = P_yes * P(12,3,1); P_no = P_no * P(12,3,2); elseif J(12) <= 2.9 P_yes = P_yes * P(12,4,1); P_no = P_no * P(12,4,2); else P_yes = P_yes * P(12,5,1); P_no = P_no * P(12,5,2); end % discretize f13: if J(13) <= -3.4 P_yes = P_yes * P(13,1,1); P_no = P_no * P(13,1,2); elseif J(13) <= -.1 P_yes = P_yes * P(13,2,1); P_no = P_no * P(13,2,2); elseif J(13) <= .1 P_yes = P_yes * P(13,3,1); P_no = P_no * P(13,3,2); elseif J(13) <= 3.3 P_yes = P_yes * P(13,4,1); P_no = P_no * P(13,4,2); else P_yes = P_yes * P(13,5,1); P_no = P_no * P(13,5,2); end % discretize f14: if J(14) <= -2 P_yes = P_yes * P(14,1,1); P_no = P_no * P(14,1,2); elseif J(14) <= -.1 P_yes = P_yes * P(14,2,1); P_no = P_no * P(14,2,2); elseif J(14) <= .1 P_yes = P_yes * P(14,3,1); P_no = P_no * P(14,3,2); elseif J(14) <= 2 P_yes = P_yes * P(14,4,1); P_no = P_no * P(14,4,2); else P_yes = P_yes * P(14,5,1); P_no = P_no * P(14,5,2); end % discretize f15: if J(15) <= -2 P_yes = P_yes * P(15,1,1); P_no = P_no * P(15,1,2); elseif J(15) <= -.1 P_yes = P_yes * P(15,2,1); P_no = P_no * P(15,2,2); elseif J(15) <= .1 P_yes = P_yes * P(15,3,1); P_no = P_no * P(15,3,2); elseif J(15) <= 2 P_yes = P_yes * P(15,4,1); P_no = P_no * P(15,4,2); else P_yes = P_yes * P(15,5,1); P_no = P_no * P(15,5,2); end % discretize f16: if J(16) <= -2 P_yes = P_yes * P(16,1,1); P_no = P_no * P(16,1,2); elseif J(16) <= -.2 P_yes = P_yes * P(16,2,1); P_no = P_no * P(16,2,2); elseif J(16) <= .2 P_yes = P_yes * P(16,3,1); P_no = P_no * P(16,3,2); elseif J(16) <= 3 P_yes = P_yes * P(16,4,1); P_no = P_no * P(16,4,2); else P_yes = P_yes * P(16,5,1); P_no = P_no * P(16,5,2); end % discretize f17: if J(17) <= -2.3 P_yes = P_yes * P(17,1,1); P_no = P_no * P(17,1,2); elseif J(17) <= -.1 P_yes = P_yes * P(17,2,1); P_no = P_no * P(17,2,2); elseif J(17) <= .1 P_yes = P_yes * P(17,3,1); P_no = P_no * P(17,3,2); elseif J(17) <= 2.4 P_yes = P_yes * P(17,4,1); P_no = P_no * P(17,4,2); else P_yes = P_yes * P(17,5,1); P_no = P_no * P(17,5,2); end % discretize f18: if J(18) <= -2.2 P_yes = P_yes * P(18,1,1); P_no = P_no * P(18,1,2); elseif J(18) <= -.1 P_yes = P_yes * P(18,2,1); P_no = P_no * P(18,2,2); elseif J(18) <= .2 P_yes = P_yes * P(18,3,1); P_no = P_no * P(18,3,2); elseif J(18) <= 2.3 P_yes = P_yes * P(18,4,1); P_no = P_no * P(18,4,2); else P_yes = P_yes * P(18,5,1); P_no = P_no * P(18,5,2); end if P_yes > P_no y(ceil(i1/8),ceil(i2/8)) = 1; else y(ceil(i1/8),ceil(i2/8)) = 0; end end end % apply rule-based smoothing: imwrite(y,'tmp.bmp'); dos('rbsmooth tmp.bmp tmp.bmp 1 10'); dos('rbsmooth tmp.bmp tmp.bmp 3 10'); y = imread('tmp.bmp'); y = imopen(y,strel('rect',[1 3])); imwrite(y,'tmp.bmp'); dos('rbsmooth tmp.bmp tmp.bmp 2 10'); y = imread('tmp.bmp'); y = imclose(y,strel('rect',[1 3])); y = imopen(y,strel('rect',[1 3])); y = imdilate(y,strel('rect',[3 3])); y = imresize(y,8); y = y(1:nr,1:nc);