Software engineer and tinkerer.

My Readlang Importer

# Read all input from STDIN
input = $stdin.read

# Regular expression to match the format '{{c1::<korean>::<english>}}'
regex = /\{\{c1::(.*?)::(.*?)\}\}/

# Array to hold matches
translations = []

# Find all matches and append to the translations array
input.scan(regex) do |match|
  korean, english = match
  # Crash if any match does not contain both parts
  if korean.nil? || english.nil?
    raise "Input format error: Expected '{{c1::<korean>::<english>}}'"
  end
  translations << "\"#{korean}\" / \"#{english}\""
end

# Ensure we have matches, otherwise raise an error
raise "No matches found, input may not be in the expected format." if translations.empty?

cleaned = translations.sort.uniq.filter do |t|
    ko = t.split(' / ').first
    (ko.length > 6) && ko.include?(" ") && (ko.length < 26)
  end.sort_by { |t| t.split(' / ').first.length }

puts cleaned