class Net::HTTPResponse

Attributes

no_cache[RW]

Public Instance Methods

body_charset(str=self.raw_body) click to toggle source
# File lib/rbot/httputil.rb, line 32
def body_charset(str=self.raw_body)
  ctype = self['content-type'] || 'text/html'
  return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i

  charsets = ['ISO-8859-1'] # should be in config

  if ctype.match(/charset=["']?([^\s"']+)["']?/i)
    charsets << $1
    debug "charset #{charsets.last} added from header"
  end

  # str might be invalid utf-8 that will crash on the pattern match:
  str.encode!('UTF-8', 'UTF-8', :invalid => :replace)
  case str
  when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i
    charsets << $1
    debug "xml charset #{charsets.last} added from xml pi"
  when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i
    meta = $1
    if meta =~ /charset=['"]?([^\s'";]+)['"]?/
      charsets << $1
      debug "html charset #{charsets.last} added from meta"
    end
  end
  return charsets.uniq
end
body_to_utf(str) click to toggle source
# File lib/rbot/httputil.rb, line 59
def body_to_utf(str)
  charsets = self.body_charset(str) or return str

  charsets.reverse_each do |charset|
    begin
      debug "try decoding using #{charset}"
      str.force_encoding(charset)
      tmp = str.encode('UTF-16le', :invalid => :replace, :replace => '').encode('UTF-8')
      if tmp
        str = tmp
        break
      end
    rescue
      error 'failed to use encoding'
      error $!
    end
  end

  return str
end
cooked_body() click to toggle source
# File lib/rbot/httputil.rb, line 129
def cooked_body
  return self.body_to_utf(self.decompress_body(self.raw_body))
end
decompress_body(str) click to toggle source
# File lib/rbot/httputil.rb, line 80
def decompress_body(str)
  method = self['content-encoding']
  case method
  when nil
    return str
  when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers
    debug "gunzipping body"
    begin
      return Zlib::GzipReader.new(StringIO.new(str)).read
    rescue Zlib::Error => e
      # If we can't unpack the whole stream (e.g. because we're doing a
      # partial read
      debug "full gunzipping failed (#{e}), trying to recover as much as possible"
      ret = ''
      ret.force_encoding(Encoding::ASCII_8BIT)
      begin
        Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
          ret << byte
        }
      rescue
      end
      return ret
    end
  when 'deflate'
    debug "inflating body"
    # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread:
    # -MAX_WBITS stops zlib from looking for a zlib header
    inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
    begin
      return inflater.inflate(str)
    rescue Zlib::Error => e
      raise e
      # TODO
      # debug "full inflation failed (#{e}), trying to recover as much as possible"
    end
  when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i
    # B0rked servers (Freshmeat being one of them) sometimes return the charset
    # in the content-encoding; in this case we assume that the document has
    # a standard content-encoding
    old_hsh = self.to_hash
    self['content-type']= self['content-type']+"; charset="+method.downcase
    warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}"
    return str
  else
    debug self.to_hash
    raise "Unhandled content encoding #{method}"
  end
end
partial_body(size=0) { |body_to_utf(decompress_body)| ... } click to toggle source

Read chunks from the body until we have at least size bytes, yielding the partial text at each chunk. Return the partial body.

# File lib/rbot/httputil.rb, line 135
def partial_body(size=0, &block)

  partial = String.new

  if @read
    debug "using body() as partial"
    partial = self.body
    yield self.body_to_utf(self.decompress_body(partial)) if block_given?
  else
    debug "disabling cache"
    self.no_cache = true
    self.read_body { |chunk|
      partial << chunk
      yield self.body_to_utf(self.decompress_body(partial)) if block_given?
      break if size and size > 0 and partial.length >= size
    }
  end

  return self.body_to_utf(self.decompress_body(partial))
end
to_json() click to toggle source
# File lib/rbot/httputil.rb, line 161
def to_json
  JSON::parse(self.body)
end
xpath(path) click to toggle source
# File lib/rbot/httputil.rb, line 156
def xpath(path)
  document = Nokogiri::HTML.parse(self.body)
  document.xpath(path)
end