1
0

toc_data.rb 760 B

12345678910111213141516171819202122232425262728293031
  1. require 'nokogiri'
  2. def toc_data(page_content)
  3. html_doc = Nokogiri::HTML::DocumentFragment.parse(page_content)
  4. # get a flat list of headers
  5. headers = []
  6. html_doc.css('h1, h2, h3').each do |header|
  7. headers.push({
  8. id: header.attribute('id').to_s,
  9. content: header.children,
  10. title: header.children.to_s.gsub(/<[^>]*>/, ''),
  11. level: header.name[1].to_i,
  12. children: []
  13. })
  14. end
  15. [3,2].each do |header_level|
  16. header_to_nest = nil
  17. headers = headers.reject do |header|
  18. if header[:level] == header_level
  19. header_to_nest[:children].push header if header_to_nest
  20. true
  21. else
  22. header_to_nest = header if header[:level] < header_level
  23. false
  24. end
  25. end
  26. end
  27. headers
  28. end