def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
txt = html
txt.gsub!(/<!-- start text\/html -->.*?<!-- end text\/html -->/m, '')
txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\>/i, '\1')
txt.gsub!(/<img.+?alt=\'([^\']*)\'[^>]*\>/i, '\1')
txt.gsub!(/<a\s.*?href=\"(mailto:)?([^\"]*)\"[^>]*>((.|\s)*?)<\/a>/i) do |s|
if $3.empty?
''
else
$3.strip + ' ( ' + $2.strip + ' )'
end
end
txt.gsub!(/<a\s.*?href='(mailto:)?([^\']*)\'[^>]*>((.|\s)*?)<\/a>/i) do |s|
if $3.empty?
''
else
$3.strip + ' ( ' + $2.strip + ' )'
end
end
txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1")
txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |s|
hlevel = $1.to_i
htext = $2
htext.gsub!(/<br[\s]*\/?>/i, "\n")
htext.gsub!(/<\/?[^>]*>/i, '')
hlength = 0
htext.each_line { |l| llength = l.strip.length; hlength = llength if llength > hlength }
hlength = line_length if hlength > line_length
case hlevel
when 1
htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength)
when 2
htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength)
else
htext = htext + "\n" + ('-' * hlength)
end
"\n\n" + htext + "\n\n"
end
txt.gsub!(/(<\/span>)[\s]+(<span)/mi, '\1 \2')
txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
txt.gsub!(/<\/p>/i, "\n\n")
txt.gsub!(/<br[\/ ]*>/i, "\n")
txt.gsub!(/<\/?[^>]*>/, '')
he = HTMLEntities.new
txt = he.decode(txt)
txt = word_wrap(txt, line_length)
txt.gsub!(/\r\n?/, "\n")
txt.gsub!(/\302\240+/, " ")
txt.gsub!(/\n[ \t]+/, "\n")
txt.gsub!(/[ \t]+\n/, "\n")
txt.gsub!(/[\n]{3,}/, "\n\n")
txt.gsub!(/ {2,}/, " ")
txt.gsub!(/\([ \n](http[^)]+)[\n ]\)/) do |s|
"( " + $1 + " )"
end
txt.strip
end