Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

ptechen/Capricorn

Open more actions menu

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

86 Commits
86 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Capricorn

Parse html according to configuration.

Capricorn is a html parsing library that supports recursion and custom execution order.

Version info Downloads docs example branch parameter dependency status

Default execution order

vec![String::from("selects"),
    String::from("each"),
    String::from("select_params"),
    String::from("nodes"),
    String::from("has"),
    String::from("contains")];
    
selects > each > (one or all or fields) > ... text_attr_html > (text or attr or html);
selects > select_params > selects > ... text_attr_html > (text or attr or html);
selects > nodes > has > contains > text_attr_html > (text or attr or html);

Support:

Capricorn support example val type
selects element field_name:
  selects:
      - element_name
String
selects class field_name:
  selects:
      - .class_name
String
selects class element field_name:
  selects:
      - .class_name
      - element_name
String
first field_name:
  selects:
      - element_name
  nodes:
      first: true
String
last field_name:
  selects:
      - element_name
  nodes:
      last: true
String
eq field_name:
  selects:
      - element_name
  nodes:
      eq: 0
String
parent field_name:
  selects:
      - element_name
  nodes:
      parent: true
String
children field_name:
  selects:
      - element_name
  nodes:
      children: true
String
prev_sibling field_name:
  selects:
      - element_name
  nodes:
      prev_sibling: true
String
next_sibling field_name:
  selects:
      - element_name
  nodes:
      next_sibling: true
String
has_class field_name:
  selects:
      - element_name
  has:
      class: class_name
String
has_attr field_name:
  selects:
      - element_name
  has:
      attr: attr_name
String
each one field_name:
  selects:
      - element_name
  each:
      one:
          selects:
              - .class_name
          ...
String
each all field_name:
  selects:
      - element_name
  each:
      all:
          selects:
              - .class_name
          ...
Array
each fields field_name:
  selects:
      - element_name
  each:
      fields:
        field_name:
          selects:
              - .class_name
          ...
        field_name1:
          selects:
              - .class_name
          ...
Map
select_params field_name:
  selects:
      - element_name
  select_params:
      selects:
          - .class_name
      ...
...
text field_name:
  selects:
      - element_name
  text_attr_html:
      text: true
String
attr field_name:
  selects:
      - element_name
  text_attr_html:
      attr: true
String
html field_name:
  selects:
      - element_name
  text_attr_html:
      html: true
String
text contains field_name:
  selects:
      - element_name
  contains:
      contains:
          text:
              - test
String
text not contains field_name:
  selects:
      - element_name
  contains:
      not_contains:
          text:
              - test
String
html contains field_name:
  selects:
      - element_name
  contains:
      contains:
          html:
              - test
String
html not contains field_name:
  selects:
      - element_name
  contains:
      not_contains:
          html:
              - test
String
exec order field_name:
  exec_order:
      - selects
      - has
      - nodes
  selects:
      - element_name
  has:
      class: class_name
  nodes:
      first: true
String
data format splits field_name:
  selects:
      - element_name
  data_format:
      splits:
          - { key: str }
Array
data format splits field_name:
  selects:
      - element_name
  data_format:
      splits:
          - { key: str, index: 0 }
String
data format replaces field_name:
  selects:
      - element_name
  data_format:
      replaces:
          - str
String
data format deletes field_name:
  selects:
      - element_name
  data_format:
      deletes:
          - str
String
data format find field_name:
  selects:
      - element_name
  data_format:
      find:
        - regex
String
data format find_iter field_name:
  selects:
      - element_name
  data_format:
      find_iter:
        - regex
Array
Multi-version regular matching err regexes_match_parse_html:
    - regex: regex
      version: 1
      err: err_msg
Err
Multi-version regular matching fields regexes_match_parse_html:
    - regex: regex
      version: 1
      fields:
        field_name:
          selects:
          ...
        field_name:
          selects:
          ...
Map
let yml = read_file("./test_html/test.yml").unwrap();
let params: parse::HashMapSelectParams = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r = parse::parse_html(&params, &html);
let yml = read_file("./test_html/regexes_match_parse_html.yml").unwrap();
let v:  match_html::MatchHtmlVec = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r =  v.regexes_match_parse_html(html)?;

About

Parse html according to configuration

Topics

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors

Morty Proxy This is a proxified and sanitized view of the page, visit original site.