import { extractMainContent } from '../contentExtractor';

const htmlSample = `
<html>
  <body>
    <header>Header</header>
    <nav>Navigation</nav>
    <main>
      <article>
        <h1>제목</h1>
        <p>본문 내용입니다.</p>
        <div class="ads">광고</div>
        <ul><li>항목1</li><li>항목2</li></ul>
      </article>
      <aside>사이드바</aside>
    </main>
    <footer>Footer</footer>
  </body>
</html>
`;

describe('extractMainContent', () => {
  it('should extract main content and remove ads/navigation/etc', () => {
    const result = extractMainContent(htmlSample);
    expect(result).toContain('제목');
    expect(result).toContain('본문 내용입니다.');
    expect(result).toContain('항목1');
    expect(result).not.toContain('광고');
    expect(result).not.toContain('Header');
    expect(result).not.toContain('Navigation');
    expect(result).not.toContain('Footer');
    expect(result).not.toContain('사이드바');
  });
});
