Early Release

This evaluator reflects early-stage work. We’re continuously improving its accuracy and reliability.

Running the evaluator

This code can be used to run the vocabulary evaluator once using LangChain. It makes an initial model call to generate a background knowledge assumption and a second call that takes in the background knowledge assumption, the intended grade level, and the text to generate a complexity score.
from langchain_core.messages import SystemMessage

from langchain_core.output_parsers import JsonOutputParser

from langchain_core.prompts import ChatPromptTemplate

from langchain_core.prompts.chat import HumanMessagePromptTemplate

from langchain_google_genai import ChatGoogleGenerativeAI

from langchain_openai import ChatOpenAI

from pydantic import BaseModel, Field

from textstat import textstat as ts

# Prompts from the Prompts section saved as a separate file

from prompts import vocab_prompts as prompts


# Define the model to be used for vocabulary complexity

VOCAB_MODEL = "gemini-2.5-pro"

VOCAB_TEMPERATURE = 0

vocab_complexity_model = ChatGoogleGenerativeAI(

    model=VOCAB_MODEL, temperature=VOCAB_TEMPERATURE

)


# Define the model to be used for student background knowledge generation

BK_MODEL = "gpt-4o-2024-11-20"

BK_TEMPERATURE = 0

student_bk_model = ChatOpenAI(model=BK_MODEL, temperature=BK_TEMPERATURE)


# Add VocabularyComplexityAnswerOutput from the Prompts section here.


prompt_vars = {

    "inputVars": [

        "text",

        "student_grade_level",

        "student_background_knowledge",

        "fk_level",

    ],

    "outputParser": JsonOutputParser(pydantic_object=VocabularyComplexityAnswerOutput),

}


def get_background_knowledge_assumption(text, grade):

    """Use the background knowledge prompt from the prompts section."""

    prompt = prompts.bk_prompt.format(text=text, grade=grade)

    return student_bk_model.invoke(prompt).content


def calculate_fk_score(text) -> float:

    """

    Calculate the Flesch-Kincaid Grade Level

    """

    fk_score = round(ts.flesch_kincaid_grade(text),2)

    return fk_score


def prepare_text_for_complexity_prediction(text, grade):

    """

    Prepare the text and grade given by user for text complexity prediction

    """

    dataset = {

        "text": text,

        "student_grade_level": grade,

        "fk_level": calculate_fk_score(text),

        "student_background_knowledge": get_background_knowledge_assumption(

            text, grade

        ),

    }

    return dataset


def predict_text_complexity_level(text, grade):

   """

   Predict the text complexity level as well as the complex words and reasoning.

   """

   dataset = prepare_text_for_complexity_prediction(text, grade)

   # Use system and user prompts from the prompts section.

   messages= [

       SystemMessage(content=prompts.system),

       HumanMessagePromptTemplate.from_template(prompts.user)

   ]


   #Prepare prompt

   prompt = ChatPromptTemplate(

           messages,

           input_variables=prompt_vars["inputVars"],

           partial_variables={

               "format_instructions": prompt_vars["outputParser"].get_format_instructions()

           }

       )

   chain = prompt | vocab_complexity_model | JsonOutputParser()


   #Invoke the chain

   output = chain.invoke(dataset)

   return output


# this is a single run of the evaluator against an input — in practice, we recommend running this function

# 3+ times per input and taking the majority complexity rating across all runs


text = """

Lasers have found many uses in everyday life as well as in industry. Lasers are found in CD and DVD players, where they read the code from the disk that stores a song or movie. A laser is often used to read the bar codes or SQR codes on things sold in a store, to identify a product and give its price. Lasers are used in medicine, particularly in LASIK eye surgery, where the laser is used to repair the shape of the cornea. It is used in chemistry with spectroscopy to identify materials, to find out what kind of gases, solids or liquids something is made of. Stronger lasers can be used to cut metal.

Lasers are used to measure the distance of the Moon from Earth by reflecting off reflectors left by the Apollo missions. By measuring the time it takes for the light to travel to the Moon and back again we can find out exactly how far away the moon is.

"""


grade_level = 3

predict_text_complexity_level(

    text = text,

    grade = grade_level

)