syntax = "proto3";
import "alimama.proto";
import "steepandcheap.proto";
import "mountainsteals.proto";
import "tmall.proto";
import "taobao.proto";
import "manhuadb.proto";
import "douban.proto";
import "jrj.proto";
import "telegraph.proto";
import "oabt.proto";
import "hao6v.proto";
import "publictransit.proto";
import "jd.proto";
import "techinasia.proto";
import "geoip.proto";
import "dt.proto";
import "article2.proto";
import "p6vdy.proto";
import "investing.proto";
package jarviscrawlercore;
option go_package = "github.com/zhs007/jccclient/pb";

// ParagraphType - paragraph type
enum ParagraphType {
    PT_NULL = 0;
    PT_TEXT = 1;
    PT_IMAGE = 2;
    PT_HEADLINE = 3;
    PT_POSTLIST = 4;
    PT_COMMENT = 5;
}

// ImageInfo - image info
message ImageInfo {
    string hashName = 1;
    string url = 2;
    int32 width = 3;
    int32 height = 4;
    bytes data = 5;
}

// Paragraph - paragraph
message Paragraph {
    ParagraphType pt = 1;
    string imgHashName = 2;
    string text = 3;
    string imgURL = 4;
}

// ExportArticleResult - export article result
message ExportArticleResult {
    string title = 1;
    string author = 2;
    string writeTime = 3;
    string article = 4;
    string url = 5;
    repeated ImageInfo imgs = 6;
    ImageInfo titleImage = 7;
    repeated string tags = 8;
    repeated Paragraph paragraphs = 9;
    string summary = 10;
}

// Article - article
message Article {
    string title = 1;
    string author = 2;
    string writeTime = 3;
    string summary = 4;
    string url = 5;
    ImageInfo image = 6;
    string secondTitle = 7;
    bool premium = 8;
    string lang = 9;
}

// ArticleList - article list
message ArticleList {
    repeated Article articles = 1;
}

// ConfluenceUpdateItem - confluence all updates - item
message ConfluenceUpdateItem {
    string title = 1;
    string url = 2;
    string summary = 3;
}

// ConfluenceUpdateGrouping - confluence all updates - grouping
message ConfluenceUpdateGrouping {
    string author = 1;
    repeated ConfluenceUpdateItem items = 2;
}

// ConfluenceAllUpdates - confluence all updates
message ConfluenceAllUpdates {
    repeated ConfluenceUpdateGrouping updates = 1;
}

// YCCompanies - YC Companies
message YCCompanies {
    string name = 1;
    string url = 2;
    string info = 3;
    string batch = 4;
    bool isapp = 5;
}

// CrunchBaseEmployee - CrunchBase Employee
message CrunchBaseEmployee {
    string name = 1;
    string code = 2;
    string title = 3;
    string startDate = 4;
    string endDate = 5;
}

// CrunchBaseInvestment - CrunchBase Investment
message CrunchBaseInvestment {
    string announcedDate = 1;
    string organizationName = 2;
    bool leadInvestor = 3;
    string fundingRound = 4;
    string moneyRaised = 5;
}

// CrunchBaseAcquisition - CrunchBase Acquisition
message CrunchBaseAcquisition {
    string acquiredOrganizationName = 1;
    string acquiredOrganizationCode = 2;
    string announcedDate = 3;
    string price = 4;
    string acquiringOrganizationName = 5;
    string acquiringOrganizationCode = 6;
}

// CrunchBaseInvestor - CrunchBase Investor
message CrunchBaseInvestor {
    string investorName = 1;
    bool leadInvestor = 2;
    repeated string Partners = 3;
}

// CrunchBaseFundingRound - CrunchBase FundingRound
message CrunchBaseFundingRound {
    string announcedDate = 1;
    string transactionName = 2;
    string moneyRaised = 3;

    repeated CrunchBaseInvestor investors = 100;
}

// CrunchBaseOrganization - CrunchBase Organization
message CrunchBaseOrganization {
    string name = 1;
    string code = 2;
    repeated string categories = 3;
    repeated string headquartersRegions = 4;
    string foundedDate = 5;
    repeated string founders = 6;
    string operatingStatus = 7;
    string fundingStatus = 8;
    string lastFundingType = 9;
    string legalName = 10;

    string stockSymbol = 100;
    string valuationIPO = 101;
    string priceIPO = 102;
    string dateIPO = 103;
    string moneyRaisedIPO = 104;

    repeated CrunchBaseFundingRound fundingRounds = 200;

    repeated CrunchBaseEmployee curEmployees = 301;

    repeated CrunchBaseEmployee boardMembers = 401;

    repeated CrunchBaseEmployee pastEmployees = 501;
}

// RequestTranslate - request translate
message RequestTranslate {
    // text - source text
    string text = 1;
    // platform - it's like google
    string platform = 2;
    // srcLang - source language
    string srcLang = 3;
    // destLang - destination language
    string destLang = 4;

    // token - You need to have a valid token in order to get back correctly
    string token = 100;
}

// ReplyTranslate - reply translate
message ReplyTranslate {
    // text - destination text
    string text = 1;
}

// RequestArticle - request article
message RequestArticle {
    // url
    string url = 1;
    // is need attach jquery
    bool attachJQuery = 2;

    // token - You need to have a valid token in order to get back correctly
    string token = 100;    
}

// ReplyArticle - reply article
message ReplyArticle {
    // result
    ExportArticleResult result = 1;
    // totalLength - If the message is too long, 
    // it will send data in multiple msg, this is the total length.
    int32 totalLength = 2;
    // curStart - The starting point of the current data (in bytes).
    int32 curStart = 3;
    // curLength - The length of the current data (in bytes).
    int32 curLength = 4;
    // hashData - This is the hash of each paragraph.
    string hashData = 5;
    // totalHashData - If multiple messages return data, 
    // this is the hash value of all data, only sent in the last message.
    string totalHashData = 6;
    // data
    bytes data = 7;
}

// RequestArticles - request articles
message RequestArticles {
    // url
    string url = 1 [deprecated = true];
    // is need attach jquery
    bool attachJQuery = 2 [deprecated = true];
    // website
    string website = 3;

    // token - You need to have a valid token in order to get back correctly
    string token = 100;
}

// ReplyArticles - reply articles
message ReplyArticles {
    // articles
    ArticleList articles = 1;
}

// RequestCrunchBaseCompany - request a company infomation in crunchbase
message RequestCrunchBaseCompany {
    string search = 1;
}

// CrawlerType - crawler type
enum CrawlerType {
    CT_TRANSLATE2 = 0;
    CT_CB_COMPANY = 1;
    CT_DTDATA = 2;
    CT_ANALYZEPAGE = 3;
    CT_GEOIP = 5;
    CT_TECHINASIA = 6;
    CT_STEEPANDCHEAP = 7;
    CT_JRJ = 8;
    CT_JD = 9;
    CT_ALIMAMA = 10;
    CT_MOUNTAINSTEALS = 11;
    CT_TMALL = 12;
    CT_TAOBAO = 13;
    CT_MANHUADB = 15;
    CT_DOUBAN = 16;
    CT_TELEGRAPH = 17;
    CT_OABT = 18;
    CT_HAO6V = 19;
    CT_PUBLICTRANSIT = 20;
    CT_ARTICLE2 = 21;
    CT_6VDY = 22;
    CT_INVESTING = 23;
}

// RequestTranslate2 - request translate
message RequestTranslate2 {
    // text - source text
    string text = 1;
    // platform - it's like google
    string platform = 2;
    // srcLang - source language
    string srcLang = 3;
    // destLang - destination language
    string destLang = 4;
}

// TranslateResult - reply translate
message TranslateResult {
    // text - destination text
    string text = 1;
}

// AnalyzePage - analyze page
message AnalyzePage {
    // url - url
    string url = 1;
    // delay = delay in seconds
    int32 delay = 2;
    // viewportWidth - 
    int32 viewportWidth = 3;
    // viewportHeight - 
    int32 viewportHeight = 4;
    // deviceScaleFactor - 
    float deviceScaleFactor = 5;
    // isMobile - 
    bool isMobile = 6;
    // isLandscape - 
    bool isLandscape = 7;
    // needScreenshots - 
    bool needScreenshots = 8;
    // needLogs - 
    bool needLogs = 9;
    // timeout - timeout in secend
    int32 timeout = 10;
}

// AnalyzeScreenshotType - screenshot type
enum AnalyzeScreenshotType {
    // AST_JPG - jpg
    AST_JPG = 0;
    // AST_PNG - png
    AST_PNG = 1;
}

// AnalyzeScreenshot - screenshot
message AnalyzeScreenshot {
    // name - name
    string name = 1;
    // type - type
    AnalyzeScreenshotType type = 2;
    // buf - buffer
    bytes buf = 3;
}

// AnalyzeImage - analyze request infomation
message AnalyzeReqInfo {
    // url - url
    string url = 1;
    // downloadTime - download time
    int32 downloadTime = 2;
    // bufBytes - buffer bytes
    int32 bufBytes = 3;
    // status - http status
    int32 status = 4;
    // startTime - start time
    int64 startTime = 5;
    // isGZip - is gzip
    bool isGZip = 6;
    // contentType - Content-Type
    string contentType = 7;
    // ipaddr - ip address, it's like 127.0.0.1;192.168.0.1;
    string ipaddr = 8;
    // remoteaddr - remote address, it's like 127.0.0.1:443
    string remoteaddr = 9;

    // imgWidth - 
    int32 imgWidth = 100;
    // imgHeight - 
    int32 imgHeight = 101;
}

// ReplyAnalyzePage - reply analyze page
message ReplyAnalyzePage {
    // pageTime - page time
    int32 pageTime = 1;
    // pageBytes - page bytes
    int32 pageBytes = 2;


    // errs - error list
    repeated string errs = 100;
    // reqs - request list
    repeated AnalyzeReqInfo reqs = 101;
    // screenshots - screenshot list
    repeated AnalyzeScreenshot screenshots = 102;
    // logs - console.log list
    repeated string logs = 103;
}

// CrawlerStatistics - cralwer statistics
message CrawlerStatistics {
    int32 pageNums = 1;
    int32 byteNums = 2;
    int32 time = 3;
    int32 requestNums = 4;
}

// RequestCrawler - request crawler
message RequestCrawler {
    string token = 1;
    CrawlerType crawlerType = 2;
    int32 timeout = 3;

    oneof crawlerParam {
        RequestCrunchBaseCompany cbCompany = 100;
        RequestTranslate2 translate2 = 101;
        RequestDTData dtdata = 102;
        AnalyzePage analyzePage = 103;
        RequestGeoIP geoip = 105;
        RequestTechInAsia techinasia = 106;
        RequestSteepAndCheap steepandcheap = 107;
        RequestJRJ jrj = 108;
        RequestJD jd = 109;
        RequestAlimama alimama = 110;
        RequestMountainSteals mountainsteals = 111;
        RequestTmall tmall = 112;
        RequestTaobao taobao = 113;
        RequestManhuaDB manhuadb = 115;
        RequestDouban douban = 116;
        RequestTelegraph telegraph = 117;
        RequestOABT oabt = 118;
        RequestHao6v hao6v = 119;
        RequestPublicTransit publictransit = 120;
        RequestArticle2 article2 = 121;
        RequestP6vdy p6vdy = 122;
        RequestInvesting investing = 123;
    }
}

// ReplyCrawler - reply crawler
message ReplyCrawler {
    CrawlerType crawlerType = 1;
    CrawlerStatistics statistics = 2;
    string version = 3 [deprecated = true];

    oneof crawlerResult {
        CrunchBaseOrganization cbCompany = 100;
        TranslateResult translateResult = 101;
        ReplyDTData dtdata = 102;
        ReplyAnalyzePage analyzePage = 103;
        ReplyGeoIP geoip = 105;
        ReplyTechInAsia techinasia = 106;
        ReplySteepAndCheap steepandcheap = 107;
        ReplyJRJ jrj = 108;
        ReplyJD jd = 109;
        ReplyAlimama alimama = 110;
        ReplyMountainSteals mountainsteals = 111;
        ReplyTmall tmall = 112;
        ReplyTaobao taobao = 113;
        ReplyManhuaDB manhuadb = 115;
        ReplyDouban douban = 116;
        ReplyTelegraph telegraph = 117;
        ReplyOABT oabt = 118;
        ReplyHao6v hao6v = 119;
        ReplyPublicTransit publictransit = 120;
        ReplyArticle2 article2 = 121;
        ReplyP6vdy p6vdy = 122;
        ReplyInvesting investing = 123;
    }
}

// ReplyCrawler - reply crawler
message ReplyCrawlerStream {
    // totalLength - If the message is too long, 
    // it will send data in multiple msg, this is the total length.
    int32 totalLength = 1;
    // curStart - The starting point of the current data (in bytes).
    int32 curStart = 2;
    // curLength - The length of the current data (in bytes).
    int32 curLength = 3;
    // hashData - This is the hash of each paragraph.
    string hashData = 4;
    // totalHashData - If multiple messages return data, 
    // this is the hash value of all data, only sent in the last message.
    string totalHashData = 5;
    // data
    bytes data = 6;

    string version = 7;

    string error = 100;

    ReplyCrawler replyCrawler = 200;
}

// JarvisCrawlerService - JarvisCrawler service
service JarvisCrawlerService {
    // translate - translate text
    rpc translate(RequestTranslate) returns (ReplyTranslate) {}
    // exportArticle - export article
    rpc exportArticle(RequestArticle) returns (stream ReplyArticle) {}    
    // getArticles - get articles
    rpc getArticles(RequestArticles) returns (ReplyArticles) {}

    // getDTData - get DT data
    rpc getDTData(RequestDTData) returns (ReplyDTData) {}    

    // requestCrawler - request crawler
    rpc requestCrawler(RequestCrawler) returns (stream ReplyCrawlerStream) {}
}