/**
 * scraper.ts
 *
 * exports - fetchTweetsByTimestamp, fetchPage, extractLinks, scrapeBerachainDocs
 *
 * fetchTweetsByTimestamp - exposed as external apis + used in the cron jobs to be converted to embeddings and stored in the database.
 *  - fetches recent tweets by recent timestamp and tags / authors
 *  - handles (we can take a list from De) basically we want folks who tweet reliable info and know what they're talking about wrt berachain
 *  - searchTerms - berachain launch, token wen? (basically we'll have context passed in, in case of a user asking something, and the vectorDB not having a contextual enough answer)
 *  - Start / End date - we can have a cron job that runs every 2 hours and fetches tweets from the last 2 hours (although the first timem it gets all best tweets with most engagement, from the past ~24 hours)
 *  - Engagement(minimumReplies, minimumRetweets) - to filter out spam
 *     returns Tweet[]
 *
 */
export interface SubSection {
    title: string;
    content: string;
}
export interface DocSection {
    topic: string;
    url: string;
    overview: string;
    subsections: SubSection[];
}
export declare function siteScraper(baseUrl: string): Promise<{
    title: string;
    last_updated: string;
    total_sections: number;
    sections: {
        topic: string;
        source_url: string;
        overview: string;
        subsections: SubSection[];
    }[];
}>;
