UNPKG

budoux/module/html_processor.d.ts

Version:

6.09 kBTypeScriptView Raw

1/**
* @license
* Copyright 2021 Google LLC
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
16import { Parser } from './parser.js';
17/**
* Represents a node in {@link Paragraph}.
*
* It wraps a {@link Text} or a {@link string}.
*
* A {@link string} provides the context for the parser, but it can't be split.
*/
24declare class NodeOrText {
  nodeOrText: Text | string;
  chunks: string[];
  hasBreakOpportunityAfter: boolean;
  constructor(nodeOrText: Text | string);
  get isString(): boolean;
  get canSplit(): boolean;
  get text(): string | null;
  get length(): number;
  /**
   * Split the {@link Text} in the same way as the {@link chunks}.
   * Joining all {@link chunks} must be equal to {@link text}.
   */
  split(separator: string | Node): void;
38}
39export declare class NodeOrTextForTesting extends NodeOrText {
40}
41/**
* Represents a "paragraph", broken by block boundaries or forced breaks.
*
* A CSS
* {@link https://drafts.csswg.org/css2/#inline-formatting inline formatting context}
* is usually a "paragraph", but it can be broken into multiple paragraphs by
* forced breaks such as `<br>`.
*/
49declare class Paragraph {
  element: HTMLElement;
  nodes: NodeOrText[];
  constructor(element: HTMLElement);
  isEmpty(): boolean;
  get text(): string;
  get lastNode(): NodeOrText | undefined;
  setHasBreakOpportunityAfter(): void;
  /**
   * @return Indices of forced break opportunities in the source.
   * They can be created by `<wbr>` tag or `&ZeroWidthSpace;`.
   */
  getForcedOpportunities(): number[];
  /**
   * @return Filtered {@param boundaries} by excluding
   * {@link getForcedOpportunities} if it's not empty.
   * Otherwise {@param boundaries}.
   */
  excludeForcedOpportunities(boundaries: number[]): number[];
68}
69export declare class ParagraphForTesting extends Paragraph {
70}
71/**
* Options for {@link HTMLProcessor}.
*/
74export interface HTMLProcessorOptions {
  /**
   * This class name is added to the containing block when the BudouX is applied.
   * The containing block should have following CSS properties to make it work.
   * `{ word-break: keep-all; overflow-wrap: anywhere; }`
   *
   * When falsy, an inline style is set instead.
   */
  className?: string;
  /**
   * The separator to insert at each semantics boundary.
   *
   * When it's a {@link Node}, a clone of the {@link Node} will be inserted.
   *
   * The default value is U+200B ZERO WIDTH SPACE.
   */
  separator?: string | Node;
91}
92/**
* Adds HTML processing support to a BudouX {@link Parser}.
*/
95export declare class HTMLProcessor {
  private parser_;
  /** See {@link HTMLProcessorOptions.className}. */
  className?: string;
  /** See {@link HTMLProcessorOptions.separator}. */
  separator: string | Node;
  /**
   * @param parser A BudouX {@link Parser} to compute semantic line breaks.
   */
  constructor(parser: Parser, options?: HTMLProcessorOptions);
  /**
   * Checks if the given element has a text node in its children.
   *
   * @param ele An element to be checked.
   * @return Whether the element has a child text node.
   */
  static hasChildTextNode(ele: HTMLElement): boolean;
  /**
   * Applies markups for semantic line breaks to the given HTML element.
   *
   * It breaks descendant nodes into paragraphs,
   * and applies the BudouX to each paragraph.
   * @param element The input element.
   */
  applyToElement(element: HTMLElement): void;
  /**
   * Find paragraphs from a given HTML element.
   * @param element The root element to find paragraphs.
   * @param parent The parent {@link Paragraph} if any.
   * @return A list of {@link Paragraph}s.
   */
  getBlocks(element: HTMLElement, parent?: Paragraph): IterableIterator<Paragraph>;
  /**
   * Apply the BudouX to the given {@link Paragraph}.
   * @param paragraph The {@link Paragraph} to apply.
   */
  applyToParagraph(paragraph: Paragraph): void;
  /**
   * Split {@link NodeOrText} at the specified boundaries.
   * @param nodes A list of {@link NodeOrText}.
   * @param boundaries A list of indices of the text to split at.
   */
  splitNodes(nodes: NodeOrText[], boundaries: number[]): void;
  /**
   * Applies the block style to the given element.
   * @param element The element to apply the block style.
   */
  applyBlockStyle(element: HTMLElement): void;
143}
144/**
* BudouX {@link Parser} with HTML processing support.
*/
147export declare class HTMLProcessingParser extends Parser {
  htmlProcessor: HTMLProcessor;
  constructor(model: {
      [key: string]: {
          [key: string]: number;
      };
  }, htmlProcessorOptions?: HTMLProcessorOptions);
  /**
   * @deprecated Use `applyToElement` instead. `applyElement` will be removed
   * in v0.7.0 to align the function name with `HTMLProcessor`'s API.
   *
   * Applies markups for semantic line breaks to the given HTML element.
   * @param parentElement The input element.
   */
  applyElement(parentElement: HTMLElement): void;
  /**
   * Applies markups for semantic line breaks to the given HTML element.
   * @param parentElement The input element.
   */
  applyToElement(parentElement: HTMLElement): void;
  /**
   * Translates the given HTML string to another HTML string with markups
   * for semantic line breaks.
   * @param html An input html string.
   * @return The translated HTML string.
   */
  translateHTMLString(html: string): string;
174}
175export {};

1	`/**`
2	`* @license`
3	`* Copyright 2021 Google LLC`
4	`* Licensed under the Apache License, Version 2.0 (the "License");`
5	`* you may not use this file except in compliance with the License.`
6	`* You may obtain a copy of the License at`
7	`*`
8	`* https://www.apache.org/licenses/LICENSE-2.0`
9	`*`
10	`* Unless required by applicable law or agreed to in writing, software`
11	`* distributed under the License is distributed on an "AS IS" BASIS,`
12	`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
13	`* See the License for the specific language governing permissions and`
14	`* limitations under the License.`
15	`*/`
16	`import { Parser } from './parser.js';`
17	`/**`
18	`* Represents a node in {@link Paragraph}.`
19	`*`
20	`* It wraps a {@link Text} or a {@link string}.`
21	`*`
22	`* A {@link string} provides the context for the parser, but it can't be split.`
23	`*/`
24	`declare class NodeOrText {`
25	`nodeOrText: Text \| string;`
26	`chunks: string[];`
27	`hasBreakOpportunityAfter: boolean;`
28	`constructor(nodeOrText: Text \| string);`
29	`get isString(): boolean;`
30	`get canSplit(): boolean;`
31	`get text(): string \| null;`
32	`get length(): number;`
33	`/**`
34	`* Split the {@link Text} in the same way as the {@link chunks}.`
35	`* Joining all {@link chunks} must be equal to {@link text}.`
36	`*/`
37	`split(separator: string \| Node): void;`
38	`}`
39	`export declare class NodeOrTextForTesting extends NodeOrText {`
40	`}`
41	`/**`
42	`* Represents a "paragraph", broken by block boundaries or forced breaks.`
43	`*`
44	`* A CSS`
45	`* {@link https://drafts.csswg.org/css2/#inline-formatting inline formatting context}`
46	`* is usually a "paragraph", but it can be broken into multiple paragraphs by`
47	* forced breaks such as `<br>`.
48	`*/`
49	`declare class Paragraph {`
50	`element: HTMLElement;`
51	`nodes: NodeOrText[];`
52	`constructor(element: HTMLElement);`
53	`isEmpty(): boolean;`
54	`get text(): string;`
55	`get lastNode(): NodeOrText \| undefined;`
56	`setHasBreakOpportunityAfter(): void;`
57	`/**`
58	`* @return Indices of forced break opportunities in the source.`
59	* They can be created by `<wbr>` tag or `&ZeroWidthSpace;`.
60	`*/`
61	`getForcedOpportunities(): number[];`
62	`/**`
63	`* @return Filtered {@param boundaries} by excluding`
64	`* {@link getForcedOpportunities} if it's not empty.`
65	`* Otherwise {@param boundaries}.`
66	`*/`
67	`excludeForcedOpportunities(boundaries: number[]): number[];`
68	`}`
69	`export declare class ParagraphForTesting extends Paragraph {`
70	`}`
71	`/**`
72	`* Options for {@link HTMLProcessor}.`
73	`*/`
74	`export interface HTMLProcessorOptions {`
75	`/**`
76	`* This class name is added to the containing block when the BudouX is applied.`
77	`* The containing block should have following CSS properties to make it work.`
78	* `{ word-break: keep-all; overflow-wrap: anywhere; }`
79	`*`
80	`* When falsy, an inline style is set instead.`
81	`*/`
82	`className?: string;`
83	`/**`
84	`* The separator to insert at each semantics boundary.`
85	`*`
86	`* When it's a {@link Node}, a clone of the {@link Node} will be inserted.`
87	`*`
88	`* The default value is U+200B ZERO WIDTH SPACE.`
89	`*/`
90	`separator?: string \| Node;`
91	`}`
92	`/**`
93	`* Adds HTML processing support to a BudouX {@link Parser}.`
94	`*/`
95	`export declare class HTMLProcessor {`
96	`private parser_;`
97	`/** See {@link HTMLProcessorOptions.className}. */`
98	`className?: string;`
99	`/** See {@link HTMLProcessorOptions.separator}. */`
100	`separator: string \| Node;`
101	`/**`
102	`* @param parser A BudouX {@link Parser} to compute semantic line breaks.`
103	`*/`
104	`constructor(parser: Parser, options?: HTMLProcessorOptions);`
105	`/**`
106	`* Checks if the given element has a text node in its children.`
107	`*`
108	`* @param ele An element to be checked.`
109	`* @return Whether the element has a child text node.`
110	`*/`
111	`static hasChildTextNode(ele: HTMLElement): boolean;`
112	`/**`
113	`* Applies markups for semantic line breaks to the given HTML element.`
114	`*`
115	`* It breaks descendant nodes into paragraphs,`
116	`* and applies the BudouX to each paragraph.`
117	`* @param element The input element.`
118	`*/`
119	`applyToElement(element: HTMLElement): void;`
120	`/**`
121	`* Find paragraphs from a given HTML element.`
122	`* @param element The root element to find paragraphs.`
123	`* @param parent The parent {@link Paragraph} if any.`
124	`* @return A list of {@link Paragraph}s.`
125	`*/`
126	`getBlocks(element: HTMLElement, parent?: Paragraph): IterableIterator<Paragraph>;`
127	`/**`
128	`* Apply the BudouX to the given {@link Paragraph}.`
129	`* @param paragraph The {@link Paragraph} to apply.`
130	`*/`
131	`applyToParagraph(paragraph: Paragraph): void;`
132	`/**`
133	`* Split {@link NodeOrText} at the specified boundaries.`
134	`* @param nodes A list of {@link NodeOrText}.`
135	`* @param boundaries A list of indices of the text to split at.`
136	`*/`
137	`splitNodes(nodes: NodeOrText[], boundaries: number[]): void;`
138	`/**`
139	`* Applies the block style to the given element.`
140	`* @param element The element to apply the block style.`
141	`*/`
142	`applyBlockStyle(element: HTMLElement): void;`
143	`}`
144	`/**`
145	`* BudouX {@link Parser} with HTML processing support.`
146	`*/`
147	`export declare class HTMLProcessingParser extends Parser {`
148	`htmlProcessor: HTMLProcessor;`
149	`constructor(model: {`
150	`[key: string]: {`
151	`[key: string]: number;`
152	`};`
153	`}, htmlProcessorOptions?: HTMLProcessorOptions);`
154	`/**`
155	* @deprecated Use `applyToElement` instead. `applyElement` will be removed
156	* in v0.7.0 to align the function name with `HTMLProcessor`'s API.
157	`*`
158	`* Applies markups for semantic line breaks to the given HTML element.`
159	`* @param parentElement The input element.`
160	`*/`
161	`applyElement(parentElement: HTMLElement): void;`
162	`/**`
163	`* Applies markups for semantic line breaks to the given HTML element.`
164	`* @param parentElement The input element.`
165	`*/`
166	`applyToElement(parentElement: HTMLElement): void;`
167	`/**`
168	`* Translates the given HTML string to another HTML string with markups`
169	`* for semantic line breaks.`
170	`* @param html An input html string.`
171	`* @return The translated HTML string.`
172	`*/`
173	`translateHTMLString(html: string): string;`
174	`}`
175	`export {};`