Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History

readme.md

Outline

Schema of Report

Text Classification

    res = {
            "dataset-level":{
                "length_info": {
                    "max_text_length": np.max(lengths),
                    "min_text_length": np.min(lengths),
                    "average_text_length": np.average(lengths),
                },
                "label_info": {
                    "ratio":min(labels_to_number.values()) * 1.0 / max(labels_to_number.values()),
                    "distribution": labels_to_number,
                },
                "gender_info":gender_ratio,
                # "vocabulary_info":vocab_sorted,
                "number_of_samples":len(samples),
                "number_of_tokens":number_of_tokens,
                "hatespeech_info":hatespeech,
                "spelling_errors":len(spelling_errors),
            },
        "sample-level":sample_infos
    }

image

Text Pair Classification

    res = {
            "dataset-level":{
                "length_info": {
                    "max_text1_length": np.max(text1_lengths),
                    "min_text1_length": np.min(text1_lengths),
                    "average_text1_length": np.average(text1_lengths),
                    "max_text2_length": np.max(text2_lengths),
                    "min_text2_length": np.min(text2_lengths),
                    "average_text2_length": np.average(text2_lengths),
                    "text1_divided_text2":np.average(text1_divided_text2),
                },
                "label_info": {
                    "ratio": min(labels_to_number.values()) * 1.0 / max(labels_to_number.values()),
                    "distribution": labels_to_number,
                },
                "vocabulary_info":vocab_sorted,
                "number_of_samples": len(samples),
                "number_of_tokens": number_of_tokens,
                "gender_info": gender_ratio,
                "average_similarity": np.average(similarities),
                "hatespeech_info": hatespeech,
            },
        "sample-level": sample_infos
    }

image

Named Entity Recognition

    res = {
        "dataset-level": {
            "entity_info":{
                "avg_entity_length": avg_entityLen,
                "avg_entity_on_sentence": avg_entity_nums_inSent,
                "sentence_without_entity": len(samples) - len(chunks),
                "entity_length_distribution": entity_length_distribution,
            },
            "length_info": {
                "max_text_length": np.max(lengths),
                "min_text_length": np.min(lengths),
                "average_text_length": np.average(lengths),
            },
            "label_info": {
                "ratio": min(labels_to_number.values()) * 1.0 / max(labels_to_number.values()),
                "distribution": label_distribution, #labels_to_number,
            },
            "gender_info": gender_ratio,
            "vocabulary_info":vocab_sorted,
            "number_of_samples": len(samples),
            "number_of_tokens": number_of_tokens,
            "hatespeech_info": hatespeech,
        },
        "sample-level": sample_infos
    }

截屏2022-01-18 下午4 20 06

Summarization

    res = {
        "dataset-level":{
                "average_text_length":np.average(text_lengths),
                "average_summary_length":np.average(summary_lengths),
                "length_info": {
                    "max_text_length": np.max(text_lengths),
                    "min_text_length": np.min(text_lengths),
                    "average_text_length": np.average(text_lengths),
                    "max_summary_length": np.max(summary_lengths),
                    "min_summary_length": np.min(summary_lengths),
                    "average_summary_length": np.average(summary_lengths),
                },
                "number_of_samples": len(samples),
                "number_of_tokens": number_of_tokens,
                "vocabulary_info": vocab_sorted,
                "gender_info": gender_ratio,
                "hatespeech_info": hatespeech,
                **attr_avg,
        },
        "sample-level": sample_infos,
    }
Morty Proxy This is a proxified and sanitized view of the page, visit original site.