]*class\s*=\s*["']?(?:posted|plugin-\w+)['"]?[^>]*>/) list.each do |block| next unless block block.strip! next if has_only_tags(block) continuous /= continuous_factor if body.length > 0 # リンク除外&リンクリスト判定 notlinked = eliminate_link(block) next if notlinked.length < min_length # スコア算出 c = (notlinked.length + notlinked.scan(punctuations).length * punctuation_weight) * factor factor *= decay_factor not_body_rate = block.scan(waste_expressions).length + block.scan(/amazon[a-z0-9\.\/\-\?&]+-22/i).length / 2.0 c *= (0.72 ** not_body_rate) if not_body_rate>0 c1 = c * continuous puts "----- #{c}*#{continuous}=#{c1} #{notlinked.length} \n#{strip_tags(block)[0,100]}\n" if debug # ブロック抽出&スコア加算 if c1 > threshold body += block + "\n" score += c1 continuous = continuous_factor elsif c > threshold # continuous block end bodylist << [body, score] body = block + "\n" score = c continuous = continuous_factor end end bodylist << [body, score] body = bodylist.inject{|a,b| if a[1]>=b[1] then a else b end } [strip_tags(body[0]), title] end # Extracts title. def self.extract_title(st) if st =~ /