Parent

Nokogiri::HTML::Document

Public Class Methods

new(*args) click to toggle source

(Not documented)

# File lib/nokogiri/html/document.rb, line 5
      def initialize *args
        super
      end
new click to toggle source

Create a new document

static VALUE new(int argc, VALUE *argv, VALUE klass)
{
  VALUE uri, external_id, rest, rb_doc;

  rb_scan_args(argc, argv, "0*", &rest);
  uri         = rb_ary_entry(rest, 0);
  external_id = rb_ary_entry(rest, 1);

  htmlDocPtr doc = htmlNewDoc(
      RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
      RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
  );
  rb_doc = Nokogiri_wrap_xml_document(klass, doc);
  rb_funcall2(rb_doc, rb_intern("initialize"), argc, argv);
  return rb_doc ;
}
parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) click to toggle source

Parse HTML. thing may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.

# File lib/nokogiri/html/document.rb, line 50
        def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block

          options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
          # Give the options to the user
          yield options if block_given?

          if string_or_io.respond_to?(:encoding)
            encoding ||= string_or_io.encoding.name
          end

          if string_or_io.respond_to?(:read)
            url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
            return self.read_io(string_or_io, url, encoding, options.to_i)
          end

          return self.new if(string_or_io.length == 0)
          self.read_memory(string_or_io, url, encoding, options.to_i)
        end
read_io(io, url, encoding, options) click to toggle source

Read the HTML document from io with given url, encoding, and options. See Nokogiri::HTML.parse

static VALUE read_io( VALUE klass,
                      VALUE io,
                      VALUE url,
                      VALUE encoding,
                      VALUE options )
{
  const char * c_url    = (url == Qnil) ? NULL : StringValuePtr(url);
  const char * c_enc    = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
  VALUE error_list      = rb_ary_new();

  xmlInitParser();
  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  htmlDocPtr doc = htmlReadIO(
      io_read_callback,
      io_close_callback,
      (void *)io,
      c_url,
      c_enc,
      NUM2INT(options)
  );
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlFreeDoc(doc);

    xmlErrorPtr error = xmlGetLastError();
    if(error)
      rb_funcall(rb_mKernel, rb_intern("raise"), 1,
          Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
      );
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  VALUE document = Nokogiri_wrap_xml_document(klass, doc);
  rb_funcall(document, rb_intern("errors="), 1, error_list);
  return document;
}
read_memory(string, url, encoding, options) click to toggle source

Read the HTML document contained in string with given url, encoding, and options. See Nokogiri::HTML.parse

static VALUE read_memory( VALUE klass,
                          VALUE string,
                          VALUE url,
                          VALUE encoding,
                          VALUE options )
{
  const char * c_buffer = StringValuePtr(string);
  const char * c_url    = (url == Qnil) ? NULL : StringValuePtr(url);
  const char * c_enc    = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
  int len               = RSTRING_LEN(string);
  VALUE error_list      = rb_ary_new();

  xmlInitParser();
  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlFreeDoc(doc);

    xmlErrorPtr error = xmlGetLastError();
    if(error)
      rb_funcall(rb_mKernel, rb_intern("raise"), 1,
          Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
      );
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  VALUE document = Nokogiri_wrap_xml_document(klass, doc);
  rb_funcall(document, rb_intern("errors="), 1, error_list);
  return document;
}

Public Instance Methods

fragment(tags) click to toggle source

Create a Nokogiri::XML::DocumentFragment from tags

# File lib/nokogiri/html/document.rb, line 37
      def fragment tags
        DocumentFragment.new(self, tags)
      end
meta_encoding click to toggle source

Get the meta tag encoding for this document.

static VALUE meta_encoding(VALUE self)
{
  htmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  return NOKOGIRI_STR_NEW2(htmlGetMetaEncoding(doc), doc->encoding);
}
meta_encoding= click to toggle source

Set the meta tag encoding for this document.

static VALUE set_meta_encoding(VALUE self, VALUE encoding)
{
  htmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  htmlSetMetaEncoding(doc, (const xmlChar *)StringValuePtr(encoding));

  return encoding;
}
serialize(*args) click to toggle source

Serialize this Document with encoding using options

# File lib/nokogiri/html/document.rb, line 11
      def serialize *args
        if args.first && !args.first.is_a?(Hash)
          $stderr.puts("\#{self.class}#serialize(encoding, save_opts) is deprecated and will be removed in\nNokogiri version 1.4.0 *or* after June 1 2009.\nYou called serialize from here:\n\n\#{caller.join(\"\\n\")}\n\nPlease change to \#{self.class}#serialize(:encoding => enc, :save_with => opts)\n")
        end

        options = args.first.is_a?(Hash) ? args.shift : {
          :encoding   => args[0],
          :save_with  => args[1] || XML::Node::SaveOptions::FORMAT |
            XML::Node::SaveOptions::AS_HTML |
            XML::Node::SaveOptions::NO_DECLARATION |
            XML::Node::SaveOptions::NO_EMPTY_TAGS
        }
        super(options)
      end
type click to toggle source

The type for this document

static VALUE type(VALUE self)
{
  htmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);
  return INT2NUM((int)doc->type);
}

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.