Early Release

This evaluator reflects early-stage work. We’re continuously improving its accuracy and reliability.
This example Python code can be used to run the Grade Level Appropriateness Evaluator once using LangChain.
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts.chat import HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser

system_prompt = """
  <<Insert the system prompt from the Prompts section here>>
"""

user_prompt = """
  <<Insert the user prompt from the Prompts section here>>
"""

formatting_instructions = """
  <<Insert formatting instructions from the Prompts section here>>
"""

def run_gla_evaluator(text):
    #Construct data for LLM invocation
    input_data = {
        "text": text
    }

    #Set up model to use
    model_name = 'gemini-2.5-pro'
    #Connect to a model. Requires GOOGLE_API_KEY in environ.
    llm = ChatGoogleGenerativeAI(model=model_name, temperature=0.25)
    messages = [
      SystemMessage(content=system_prompt),
      HumanMessagePromptTemplate.from_template("""
	  {user_prompt}
	  <begin of text to evaluate>
  {text}
	  <end of text to evaluate>\n
	  {format_instructions}
      """)
    ]

    #Prepare prompt
    prompt = ChatPromptTemplate(
        messages,
        input_variables=["text"],
        partial_variables={
            "format_instructions": formatting_instructions,
	     "user_prompt": user_prompt,
        }
    )
    chain = prompt | llm | JsonOutputParser()

    #Invoke the chain
    output = chain.invoke(input_data)
    return output

# Run the evaluator once:
text = """
  <<Insert your text to evaluate here>>
"""
output = run_gla_evaluator(text)

print(
  f"""
    Target grade band is: {output['grade']}\n
    Alternative grade band is: {output['alternative_grade']}\n
    Recommended scaffolding for alternative grade band is: {output['scaffolding_needed']}\n
    Reasoning: \n{output['reasoning']}\n
  """
)

Evaluating a series of tests

For code to run the evaluator over a series of texts, see the Tutorial.