From 0dde58d6844598d2fc08636009cf65f8917d1f9d Mon Sep 17 00:00:00 2001
From: zachary62 <zhuang333@wisc.edu>
Date: Fri, 27 Dec 2024 05:29:24 +0000
Subject: [PATCH] add examples

---
 .gitignore                               |   3 +-
 cookbook/demo.ipynb                      | 267 ++++++++++++++++
 data/PaulGrahamEssaysLarge/addiction.txt | 116 +++++++
 data/PaulGrahamEssaysLarge/aord.txt      | 126 ++++++++
 data/PaulGrahamEssaysLarge/apple.txt     | 201 ++++++++++++
 data/PaulGrahamEssaysLarge/avg.txt       | 375 ++++++++++++++++++++++
 data/PaulGrahamEssaysLarge/before.txt    | 387 +++++++++++++++++++++++
 docs/prompt                              | 223 ++++++++++++-
 minillmflow/__init__.py                  |  30 +-
 setup.py                                 |   2 +-
 tests/test_async_batch_flow.py           |  10 +-
 tests/test_async_flow.py                 |  18 +-
 tests/test_batch_flow.py                 |  12 +-
 tests/test_batch_node.py                 |  10 +-
 tests/test_flow_basic.py                 |  10 +-
 tests/test_flow_composition.py           |  22 +-
 16 files changed, 1753 insertions(+), 59 deletions(-)
 create mode 100644 cookbook/demo.ipynb
 create mode 100644 data/PaulGrahamEssaysLarge/addiction.txt
 create mode 100644 data/PaulGrahamEssaysLarge/aord.txt
 create mode 100644 data/PaulGrahamEssaysLarge/apple.txt
 create mode 100644 data/PaulGrahamEssaysLarge/avg.txt
 create mode 100644 data/PaulGrahamEssaysLarge/before.txt

diff --git a/.gitignore b/.gitignore
index 13a1aac..884a33a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,4 +71,5 @@ htmlcov/
 *.temp
 
 
-test.ipynb
\ No newline at end of file
+test.ipynb
+.pytest_cache/
\ No newline at end of file
diff --git a/cookbook/demo.ipynb b/cookbook/demo.ipynb
new file mode 100644
index 0000000..a5a6f3c
--- /dev/null
+++ b/cookbook/demo.ipynb
@@ -0,0 +1,267 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No relevant file found: the question has no relevant file because while some files discuss startups, none specifically address how to find or generate startup ideas\n",
+      "No question asked\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'default'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Example App for text summarization & QA using minillmflow\n",
+    "from minillmflow import Node, BatchNode, Flow, BatchFlow, AsyncNode, AsyncFlow, BatchAsyncFlow\n",
+    "import os\n",
+    "\n",
+    "# 1) Implement a simple LLM helper (OpenAI in this example).\n",
+    "def call_LLM(prompt):\n",
+    "    # Users must set an OpenAI API key; can also load from env var, etc.\n",
+    "    openai.api_key = \"YOUR_API_KEY_HERE\"\n",
+    "    r = openai.ChatCompletion.create(\n",
+    "        model=\"gpt-4\",\n",
+    "        messages=[{\"role\": \"user\", \"content\": prompt}]\n",
+    "    )\n",
+    "    return r.choices[0].message.content\n",
+    "\n",
+    "# 2) Create a shared store (dict) for Node/Flow data exchange.\n",
+    "#    This can be replaced with a DB or other storage.\n",
+    "#    Design the structure / schema based on the app requirements.\n",
+    "shared = {\"data\": {}, \"summary\": {}}\n",
+    "\n",
+    "# 3) Create a Node that loads data from disk into shared['data'].\n",
+    "class LoadData(Node):\n",
+    "    # For compute-intensive operations, do them in prep().\n",
+    "    def prep(self, shared):\n",
+    "        path = \"../data/PaulGrahamEssaysLarge\"\n",
+    "        for filename in os.listdir(path):\n",
+    "            with open(os.path.join(path, filename), 'r') as f:\n",
+    "                shared['data'][filename] = f.read()\n",
+    "    # If LLM was needed, we'd handle it in exec(). Not needed here.\n",
+    "    # (idempotent so it can be retried if needed)\n",
+    "    def exec(self,shared,prep_res): pass \n",
+    "    # post() can update shared again or decide the next node (by return the action).\n",
+    "    def post(self,shared,prep_res,exec_res): pass \n",
+    "\n",
+    "load_data = LoadData()\n",
+    "# Run the data-loading node once\n",
+    "load_data.run(shared)\n",
+    "\n",
+    "# 4) Create a Node that summarizes a single file using the LLM.\n",
+    "class SummarizeFile(Node):\n",
+    "    def prep(self, shared):\n",
+    "        # Use self.params (which must remain immutable during prep/exec/post).\n",
+    "        # Typically, we only store identifying info in params (e.g., filename).\n",
+    "        content = shared['data'][self.params['filename']]\n",
+    "        return content\n",
+    "    def exec(self, shared, prep_res):\n",
+    "        content = prep_res\n",
+    "        prompt = f\"{content} Respond a summary of above in 10 words\"\n",
+    "        summary = call_llm(prompt)\n",
+    "        return summary\n",
+    "    def post(self, shared, prep_res, exec_res):\n",
+    "        shared[\"summary\"][self.params['filename']] = exec_res\n",
+    "\n",
+    "summarize_file = SummarizeFile()\n",
+    "# For testing, we set params directly on the node.\n",
+    "# In real usage, you'd set them in a Flow or BatchFlow.\n",
+    "summarize_file.set_params({\"filename\":\"addiction.txt\"})\n",
+    "summarize_file.run(shared)\n",
+    "\n",
+    "# 5) If data is large, we can apply a map-reduce pattern:\n",
+    "#    - MapSummaries(BatchNode) => chunk the file and summarize each chunk\n",
+    "#    - ReduceSummaries(Node)   => combine those chunk-level summaries\n",
+    "class MapSummaries(BatchNode):\n",
+    "    def prep(self, shared):\n",
+    "        content = shared['data'][self.params['filename']]\n",
+    "        chunk_size = 10000\n",
+    "        chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]\n",
+    "        # Must return an iterable (list or generator) for a BatchNode.\n",
+    "        return chunks\n",
+    "    def exec(self, shared, prep_res):\n",
+    "        # Each iteration of prep_res corresponds to a single chunk.\n",
+    "        chunk = prep_res\n",
+    "        prompt = f\"{chunk} Respond a summary of above in 10 words\"\n",
+    "        summary = call_llm(prompt)\n",
+    "        return summary\n",
+    "    def post(self, shared, prep_res, exec_res):\n",
+    "        # exec_res is a list of exec() results (summaries for each chunk).\n",
+    "        combined_summary = [f\"{i}. {summary}\" for i, summary in enumerate(exec_res)]\n",
+    "        shared[\"summary\"][self.params['filename']] = combined_summary\n",
+    "\n",
+    "class ReduceSummaries(Node):\n",
+    "    def prep(self, shared):\n",
+    "        # Retrieve the list of chunk summaries from shared storage\n",
+    "        return shared[\"summary\"][self.params['filename']]\n",
+    "    def exec(self, shared, prep_res):\n",
+    "        combined_summary = prep_res\n",
+    "        prompt = f\"{combined_summary} Respond a summary of above in 10 words\"\n",
+    "        summary = call_llm(prompt)\n",
+    "        return summary\n",
+    "    def post(self, shared, prep_res, exec_res):\n",
+    "        # Store the combined summary as the final summary for this file.\n",
+    "        shared[\"summary\"][self.params['filename']] = exec_res\n",
+    "        \n",
+    "map_summaries = MapSummaries()\n",
+    "reduce_summaries = ReduceSummaries()\n",
+    "# Link map_summaries to reduce_summaries with an action\n",
+    "# By default, the action is \"default\" (when post returns None, it takes \"default\" action)\n",
+    "# This is the same as map_summaries - \"default\" >> reduce_summaries\n",
+    "map_summaries >> reduce_summaries\n",
+    "\n",
+    "# We don't directly call map_summaries.run(shared), \n",
+    "# because that alone would process only the map step without reduce.\n",
+    "\n",
+    "# 6) Instead, create a Flow that starts from map_summaries (a Node) \n",
+    "#    and automatically includes reduce_summaries. \n",
+    "#    Note: A Flow can also start from any other Flow or BatchFlow.\n",
+    "\n",
+    "\n",
+    "file_summary_flow = Flow(start=map_summaries)\n",
+    "# When a flow params is set, it will recursively set its params to all nodes in the flow\n",
+    "file_summary_flow.set_params({\"filename\":\"before.txt\"})\n",
+    "file_summary_flow.run(shared)\n",
+    "\n",
+    "# 7) Summarize all files using a BatchFlow that reruns file_summary_flow for each file\n",
+    "class SummarizeAllFiles(BatchFlow):\n",
+    "    def prep(self, shared):\n",
+    "        # Return a list of parameters to apply in each flow iteration.\n",
+    "        # Each individual param will be merged with this node's own params \n",
+    "        # Allowing nesting of multi-level BatchFlow. \n",
+    "        # E.g., first level diretcory, second level file.\n",
+    "        return [{\"filename\":filename} for filename in shared['data']]\n",
+    "\n",
+    "summarize_all_files = SummarizeAllFiles(start=file_summary_flow)\n",
+    "summarize_all_files.run(shared)\n",
+    "\n",
+    "\n",
+    "# 8) QA Agent: Find the most relevant file based on summary with actions\n",
+    "#    if no question is asked:\n",
+    "#       (a) end: terminate the flow \n",
+    "#    if question is asked:\n",
+    "#         if relevant file is found:\n",
+    "#            (b) answer: move to answer node and read the whole file to answer the question\n",
+    "#         if no relevant file is found:\n",
+    "#            (c) retry: retry the process to find the relevant file\n",
+    "class FindRelevantFile(Node):\n",
+    "    def prep(self, shared):\n",
+    "        question = input(\"Enter a question: \")\n",
+    "        formatted_list = [f\"- '{filename}': {shared['summary'][filename]}\" \n",
+    "                         for filename in shared['summary']]\n",
+    "        return question, formatted_list\n",
+    "    def exec(self, shared, prep_res):\n",
+    "        question, formatted_list = prep_res\n",
+    "        if not question:\n",
+    "            return {\"think\":\"no question\", \"has_relevant\":False}\n",
+    "        # Provide a structured YAML output that includes:\n",
+    "        # - The chain of thought\n",
+    "        # - Whether any relevant file was found\n",
+    "        # - The most relevant file if found\n",
+    "        prompt = f\"\"\"Question: {question} \n",
+    "Find the most relevant file from: \n",
+    "{formatted_list}\n",
+    "If no relevant file, explain why\n",
+    "Respond in yaml without additional information:\n",
+    "think: the question has/has no relevant file ...\n",
+    "has_relevant: true/false\n",
+    "most_relevant: filename\"\"\"\n",
+    "        response = call_llm(prompt)\n",
+    "        import yaml\n",
+    "        result = yaml.safe_load(response)\n",
+    "        # Ensure required fields are present\n",
+    "        assert \"think\" in result\n",
+    "        assert \"has_relevant\" in result\n",
+    "        assert \"most_relevant\" in result if result[\"has_relevant\"] else True\n",
+    "        return result\n",
+    "    # handle errors by returning a default response in case of exception after retries\n",
+    "    def process_after_fail(self,shared,prep_res,exc):\n",
+    "        # if not overridden, the default is to throw the exception\n",
+    "        return {\"think\":\"error finding the file\", \"has_relevant\":False}\n",
+    "    def post(self, shared, prep_res, exec_res):\n",
+    "        question, _ = prep_res\n",
+    "        # Decide what to do next based on the results\n",
+    "        if not question:\n",
+    "            print(f\"No question asked\")\n",
+    "            return \"end\"\n",
+    "        if exec_res[\"has_relevant\"]:\n",
+    "            # Store the question and most relevant file in shared\n",
+    "            shared[\"question\"] = question\n",
+    "            shared[\"relevant_file\"] = exec_res['most_relevant']\n",
+    "            print(f\"Relevant file found: {exec_res['most_relevant']}\")\n",
+    "            return \"answer\"\n",
+    "        else:\n",
+    "            print(f\"No relevant file found: {exec_res['think']}\")\n",
+    "            return \"retry\"\n",
+    "\n",
+    "class AnswerQuestion(Node):\n",
+    "    def prep(self, shared):\n",
+    "        question = shared['question']\n",
+    "        relevant_file = shared['relevant_file']\n",
+    "        # Read the whole file content\n",
+    "        file_content = shared['data'][relevant_file]\n",
+    "        return question, file_content\n",
+    "    def exec(self, shared, prep_res):\n",
+    "        question, file_content = prep_res\n",
+    "        prompt = f\"\"\"Question: {question}\n",
+    "File: {file_content}\n",
+    "Answer the question in 50 words\"\"\"\n",
+    "        response = call_llm(prompt)\n",
+    "        return response\n",
+    "    def post(self, shared, prep_res, exec_res):\n",
+    "        print(f\"Answer: {exec_res}\")\n",
+    "\n",
+    "class NoOp(Node):\n",
+    "    pass\n",
+    "\n",
+    "# Configure the QA agent with appropriate transitions and retries\n",
+    "find_relevant_file = FindRelevantFile(max_retries=3)\n",
+    "answer_question = AnswerQuestion()\n",
+    "no_op = NoOp()\n",
+    "\n",
+    "# Connect the nodes based on the actions they return\n",
+    "find_relevant_file - \"answer\" >> answer_question >> find_relevant_file\n",
+    "find_relevant_file - \"retry\" >> find_relevant_file\n",
+    "find_relevant_file - \"end\" >> no_op\n",
+    "\n",
+    "qa_agent = Flow(start=find_relevant_file)\n",
+    "qa_agent.run(shared)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/PaulGrahamEssaysLarge/addiction.txt b/data/PaulGrahamEssaysLarge/addiction.txt
new file mode 100644
index 0000000..ff11c9f
--- /dev/null
+++ b/data/PaulGrahamEssaysLarge/addiction.txt
@@ -0,0 +1,116 @@
+July 2010What hard liquor, cigarettes, heroin, and crack have in common is
+that they're all more concentrated forms of less addictive predecessors.
+Most if not all the things we describe as addictive are.  And the
+scary thing is, the process that created them is accelerating.We wouldn't want to stop it.  It's the same process that cures
+diseases: technological progress.  Technological progress means
+making things do more of what we want.  When the thing we want is
+something we want to want, we consider technological progress good.
+If some new technique makes solar cells x% more efficient, that
+seems strictly better.  When progress concentrates something we
+don't want to want—when it transforms opium into heroin—it seems
+bad.  But it's the same process at work.
+[1]No one doubts this process is accelerating, which means increasing
+numbers of things we like will be transformed into things we like
+too much.
+[2]As far as I know there's no word for something we like too much.
+The closest is the colloquial sense of "addictive." That usage has
+become increasingly common during my lifetime.  And it's clear why:
+there are an increasing number of things we need it for.  At the
+extreme end of the spectrum are crack and meth.  Food has been
+transformed by a combination of factory farming and innovations in
+food processing into something with way more immediate bang for the
+buck, and you can see the results in any town in America.  Checkers
+and solitaire have been replaced by World of Warcraft and FarmVille.
+TV has become much more engaging, and even so it can't compete with Facebook.The world is more addictive than it was 40 years ago.   And unless
+the forms of technological progress that produced these things are
+subject to different laws than technological progress in general,
+the world will get more addictive in the next 40 years than it did
+in the last 40.The next 40 years will bring us some wonderful things.  I don't
+mean to imply they're all to be avoided.  Alcohol is a dangerous
+drug, but I'd rather live in a world with wine than one without.
+Most people can coexist with alcohol; but you have to be careful.
+More things we like will mean more things we have to be careful
+about.Most people won't, unfortunately.  Which means that as the world
+becomes more addictive, the two senses in which one can live a
+normal life will be driven ever further apart.  One sense of "normal"
+is statistically normal: what everyone else does.  The other is the
+sense we mean when we talk about the normal operating range of a
+piece of machinery: what works best.These two senses are already quite far apart.  Already someone
+trying to live well would seem eccentrically abstemious in most of
+the US.  That phenomenon is only going to become more pronounced.
+You can probably take it as a rule of thumb from now on that if
+people don't think you're weird, you're living badly.Societies eventually develop antibodies to addictive new things.
+I've seen that happen with cigarettes.  When cigarettes first
+appeared, they spread the way an infectious disease spreads through
+a previously isolated population.  Smoking rapidly became a
+(statistically) normal thing.  There were ashtrays everywhere.  We
+had ashtrays in our house when I was a kid, even though neither of
+my parents smoked.  You had to for guests.As knowledge spread about the dangers of smoking, customs changed.
+In the last 20 years, smoking has been transformed from something
+that seemed totally normal into a rather seedy habit: from something
+movie stars did in publicity shots to something small huddles of
+addicts do outside the doors of office buildings.  A lot of the
+change was due to legislation, of course, but the legislation
+couldn't have happened if customs hadn't already changed.It took a while though—on the order of 100 years.  And unless the
+rate at which social antibodies evolve can increase to match the
+accelerating rate at which technological progress throws off new
+addictions, we'll be increasingly unable to rely on customs to
+protect us.
+[3]
+Unless we want to be canaries in the coal mine
+of each new addiction—the people whose sad example becomes a
+lesson to future generations—we'll have to figure out for ourselves
+what to avoid and how.  It will actually become a reasonable strategy
+(or a more reasonable strategy) to suspect 
+everything new.In fact, even that won't be enough.  We'll have to worry not just
+about new things, but also about existing things becoming more
+addictive.  That's what bit me.  I've avoided most addictions, but
+the Internet got me because it became addictive while I was using
+it.
+[4]Most people I know have problems with Internet addiction.  We're
+all trying to figure out our own customs for getting free of it.
+That's why I don't have an iPhone, for example; the last thing I
+want is for the Internet to follow me out into the world.
+[5]
+My latest trick is taking long hikes.  I used to think running was a
+better form of exercise than hiking because it took less time.  Now
+the slowness of hiking seems an advantage, because the longer I
+spend on the trail, the longer I have to think without interruption.Sounds pretty eccentric, doesn't it?  It always will when you're
+trying to solve problems where there are no customs yet to guide
+you.  Maybe I can't plead Occam's razor; maybe I'm simply eccentric.
+But if I'm right about the acceleration of addictiveness, then this
+kind of lonely squirming to avoid it will increasingly be the fate
+of anyone who wants to get things done.  We'll increasingly be
+defined by what we say no to.
+Notes[1]
+Could you restrict technological progress to areas where you
+wanted it?  Only in a limited way, without becoming a police state.
+And even then your restrictions would have undesirable side effects.
+"Good" and "bad" technological progress aren't sharply differentiated,
+so you'd find you couldn't slow the latter without also slowing the
+former.  And in any case, as Prohibition and the "war on drugs"
+show, bans often do more harm than good.[2]
+Technology has always been accelerating.  By Paleolithic
+standards, technology evolved at a blistering pace in the Neolithic
+period.[3]
+Unless we mass produce social customs.  I suspect the recent
+resurgence of evangelical Christianity in the US is partly a reaction
+to drugs.  In desperation people reach for the sledgehammer; if
+their kids won't listen to them, maybe they'll listen to God.  But
+that solution has broader consequences than just getting kids to
+say no to drugs.  You end up saying no to 
+science as well.
+I worry we may be heading for a future in which only a few people
+plot their own itinerary through no-land, while everyone else books
+a package tour.  Or worse still, has one booked for them by the
+government.[4]
+People commonly use the word "procrastination" to describe
+what they do on the Internet.  It seems to me too mild to describe
+what's happening as merely not-doing-work.  We don't call it
+procrastination when someone gets drunk instead of working.[5]
+Several people have told me they like the iPad because it
+lets them bring the Internet into situations where a laptop would
+be too conspicuous.  In other words, it's a hip flask.  (This is
+true of the iPhone too, of course, but this advantage isn't as
+obvious because it reads as a phone, and everyone's used to those.)Thanks to Sam Altman, Patrick Collison, Jessica Livingston, and
+Robert Morris for reading drafts of this.
\ No newline at end of file
diff --git a/data/PaulGrahamEssaysLarge/aord.txt b/data/PaulGrahamEssaysLarge/aord.txt
new file mode 100644
index 0000000..fb6547a
--- /dev/null
+++ b/data/PaulGrahamEssaysLarge/aord.txt
@@ -0,0 +1,126 @@
+October 2015When I talk to a startup that's been operating for more than 8 or
+9 months, the first thing I want to know is almost always the same.
+Assuming their expenses remain constant and their revenue growth
+is what it has been over the last several months, do they make it to
+profitability on the money they have left?  Or to put it more
+dramatically, by default do they live or die?The startling thing is how often the founders themselves don't know.
+Half the founders I talk to don't know whether they're default alive
+or default dead.If you're among that number, Trevor Blackwell has made a handy
+calculator you can use to find out.The reason I want to know first whether a startup is default alive
+or default dead is that the rest of the conversation depends on the
+answer.  If the company is default alive, we can talk about ambitious
+new things they could do.  If it's default dead, we probably need
+to talk about how to save it.  We know the current trajectory ends
+badly.  How can they get off that trajectory?Why do so few founders know whether they're default alive or default
+dead?  Mainly, I think, because they're not used to asking that.
+It's not a question that makes sense to ask early on, any more than
+it makes sense to ask a 3 year old how he plans to support
+himself.  But as the company grows older, the question switches from
+meaningless to critical.  That kind of switch often takes people
+by surprise.I propose the following solution: instead of starting to ask too
+late whether you're default alive or default dead, start asking too
+early.  It's hard to say precisely when the question switches
+polarity.  But it's probably not that dangerous to start worrying
+too early that you're default dead, whereas it's very dangerous to
+start worrying too late.The reason is a phenomenon I wrote about earlier: the
+fatal pinch.
+The fatal pinch is default dead + slow growth + not enough
+time to fix it.  And the way founders end up in it is by not realizing
+that's where they're headed.There is another reason founders don't ask themselves whether they're
+default alive or default dead: they assume it will be easy to raise
+more money.  But that assumption is often false, and worse still, the
+more you depend on it, the falser it becomes.Maybe it will help to separate facts from hopes. Instead of thinking
+of the future with vague optimism, explicitly separate the components.
+Say "We're default dead, but we're counting on investors to save
+us." Maybe as you say that, it will set off the same alarms in your
+head that it does in mine.  And if you set off the alarms sufficiently
+early, you may be able to avoid the fatal pinch.It would be safe to be default dead if you could count on investors
+saving you.  As a rule their interest is a function of
+growth.  If you have steep revenue growth, say over 5x a year, you
+can start to count on investors being interested even if you're not
+profitable.
+[1]
+But investors are so fickle that you can never
+do more than start to count on them.  Sometimes something about your
+business will spook investors even if your growth is great.  So no
+matter how good your growth is, you can never safely treat fundraising
+as more than a plan A. You should always have a plan B as well: you
+should know (as in write down) precisely what you'll need to do to
+survive if you can't raise more money, and precisely when you'll 
+have to switch to plan B if plan A isn't working.In any case, growing fast versus operating cheaply is far from the
+sharp dichotomy many founders assume it to be.  In practice there
+is surprisingly little connection between how much a startup spends
+and how fast it grows.  When a startup grows fast, it's usually
+because the product hits a nerve, in the sense of hitting some big
+need straight on.  When a startup spends a lot, it's usually because
+the product is expensive to develop or sell, or simply because
+they're wasteful.If you're paying attention, you'll be asking at this point not just
+how to avoid the fatal pinch, but how to avoid being default dead.
+That one is easy: don't hire too fast.  Hiring too fast is by far
+the biggest killer of startups that raise money.
+[2]Founders tell themselves they need to hire in order to grow.  But
+most err on the side of overestimating this need rather than
+underestimating it.  Why?  Partly because there's so much work to
+do.  Naive founders think that if they can just hire enough
+people, it will all get done.  Partly because successful startups have
+lots of employees, so it seems like that's what one does in order
+to be successful.  In fact the large staffs of successful startups
+are probably more the effect of growth than the cause.  And
+partly because when founders have slow growth they don't want to
+face what is usually the real reason: the product is not appealing
+enough.Plus founders who've just raised money are often encouraged to
+overhire by the VCs who funded them.  Kill-or-cure strategies are
+optimal for VCs because they're protected by the portfolio effect.
+VCs want to blow you up, in one sense of the phrase or the other.
+But as a founder your incentives are different.  You want above all
+to survive.
+[3]Here's a common way startups die.  They make something moderately
+appealing and have decent initial growth. They raise their first
+round fairly easily, because the founders seem smart and the idea
+sounds plausible. But because the product is only moderately
+appealing, growth is ok but not great.  The founders convince
+themselves that hiring a bunch of people is the way to boost growth.
+Their investors agree.  But (because the product is only moderately
+appealing) the growth never comes.  Now they're rapidly running out
+of runway.  They hope further investment will save them. But because
+they have high expenses and slow growth, they're now unappealing
+to investors. They're unable to raise more, and the company dies.What the company should have done is address the fundamental problem:
+that the product is only moderately appealing.  Hiring people is
+rarely the way to fix that.  More often than not it makes it harder.
+At this early stage, the product needs to evolve more than to be
+"built out," and that's usually easier with fewer people.
+[4]Asking whether you're default alive or default dead may save you
+from this.  Maybe the alarm bells it sets off will counteract the
+forces that push you to overhire.  Instead you'll be compelled to
+seek growth in other ways. For example, by doing
+things that don't scale, or by redesigning the product in the
+way only founders can.
+And for many if not most startups, these paths to growth will be
+the ones that actually work.Airbnb waited 4 months after raising money at the end of Y Combinator
+before they hired their first employee.  In the meantime the founders
+were terribly overworked.  But they were overworked evolving Airbnb
+into the astonishingly successful organism it is now.Notes[1]
+Steep usage growth will also interest investors.  Revenue
+will ultimately be a constant multiple of usage, so x% usage growth
+predicts x% revenue growth.  But in practice investors discount
+merely predicted revenue, so if you're measuring usage you need a
+higher growth rate to impress investors.[2]
+Startups that don't raise money are saved from hiring too
+fast because they can't afford to. But that doesn't mean you should
+avoid raising money in order to avoid this problem, any more than
+that total abstinence is the only way to avoid becoming an alcoholic.[3]
+I would not be surprised if VCs' tendency to push founders
+to overhire is not even in their own interest.  They don't know how
+many of the companies that get killed by overspending might have
+done well if they'd survived.  My guess is a significant number.[4]
+After reading a draft, Sam Altman wrote:"I think you should make the hiring point more strongly.  I think
+it's roughly correct to say that YC's most successful companies
+have never been the fastest to hire, and one of the marks of a great
+founder is being able to resist this urge."Paul Buchheit adds:"A related problem that I see a lot is premature scaling—founders
+take a small business that isn't really working (bad unit economics,
+typically) and then scale it up because they want impressive growth
+numbers. This is similar to over-hiring in that it makes the business
+much harder to fix once it's big, plus they are bleeding cash really
+fast."
+Thanks to Sam Altman, Paul Buchheit, Joe Gebbia, Jessica Livingston,
+and Geoff Ralston for reading drafts of this.
\ No newline at end of file
diff --git a/data/PaulGrahamEssaysLarge/apple.txt b/data/PaulGrahamEssaysLarge/apple.txt
new file mode 100644
index 0000000..7a498d0
--- /dev/null
+++ b/data/PaulGrahamEssaysLarge/apple.txt
@@ -0,0 +1,201 @@
+
+
+Want to start a startup?  Get funded by
+Y Combinator.
+
+
+
+
+November 2009I don't think Apple realizes how badly the App Store approval process
+is broken.  Or rather, I don't think they realize how much it matters
+that it's broken.The way Apple runs the App Store has harmed their reputation with
+programmers more than anything else they've ever done. 
+Their reputation with programmers used to be great.
+It used to be the most common complaint you heard
+about Apple was that their fans admired them too uncritically.
+The App Store has changed that.  Now a lot of programmers
+have started to see Apple as evil.How much of the goodwill Apple once had with programmers have they
+lost over the App Store?  A third?  Half?  And that's just so far.
+The App Store is an ongoing karma leak.* * *How did Apple get into this mess?  Their fundamental problem is
+that they don't understand software.They treat iPhone apps the way they treat the music they sell through
+iTunes.  Apple is the channel; they own the user; if you want to
+reach users, you do it on their terms. The record labels agreed,
+reluctantly.  But this model doesn't work for software.  It doesn't
+work for an intermediary to own the user.  The software business
+learned that in the early 1980s, when companies like VisiCorp showed
+that although the words "software" and "publisher" fit together,
+the underlying concepts don't.  Software isn't like music or books.
+It's too complicated for a third party to act as an intermediary
+between developer and user.   And yet that's what Apple is trying
+to be with the App Store: a software publisher.  And a particularly
+overreaching one at that, with fussy tastes and a rigidly enforced
+house style.If software publishing didn't work in 1980, it works even less now
+that software development has evolved from a small number of big
+releases to a constant stream of small ones.  But Apple doesn't
+understand that either.  Their model of product development derives
+from hardware.  They work on something till they think it's finished,
+then they release it.  You have to do that with hardware, but because
+software is so easy to change, its design can benefit from evolution.
+The standard way to develop applications now is to launch fast and
+iterate.  Which means it's a disaster to have long, random delays
+each time you release a new version.Apparently Apple's attitude is that developers should be more careful
+when they submit a new version to the App Store.  They would say
+that.  But powerful as they are, they're not powerful enough to
+turn back the evolution of technology.  Programmers don't use
+launch-fast-and-iterate out of laziness.  They use it because it
+yields the best results.  By obstructing that process, Apple is
+making them do bad work, and programmers hate that as much as Apple
+would.How would Apple like it if when they discovered a serious bug in
+OS X, instead of releasing a software update immediately, they had
+to submit their code to an intermediary who sat on it for a month
+and then rejected it because it contained an icon they didn't like?By breaking software development, Apple gets the opposite of what
+they intended: the version of an app currently available in the App
+Store tends to be an old and buggy one.  One developer told me:
+
+  As a result of their process, the App Store is full of half-baked
+  applications. I make a new version almost every day that I release
+  to beta users. The version on the App Store feels old and crappy.
+  I'm sure that a lot of developers feel this way: One emotion is
+  "I'm not really proud about what's in the App Store", and it's
+  combined with the emotion "Really, it's Apple's fault."
+
+Another wrote:
+
+  I believe that they think their approval process helps users by
+  ensuring quality.  In reality, bugs like ours get through all the
+  time and then it can take 4-8 weeks to get that bug fix approved,
+  leaving users to think that iPhone apps sometimes just don't work.
+  Worse for Apple, these apps work just fine on other platforms
+  that have immediate approval processes.
+
+Actually I suppose Apple has a third misconception: that all the
+complaints about App Store approvals are not a serious problem.
+They must hear developers complaining.  But partners and suppliers
+are always complaining.  It would be a bad sign if they weren't;
+it would mean you were being too easy on them.  Meanwhile the iPhone
+is selling better than ever.  So why do they need to fix anything?They get away with maltreating developers, in the short term, because
+they make such great hardware.  I just bought a new 27" iMac a
+couple days ago.  It's fabulous.  The screen's too shiny, and the
+disk is surprisingly loud, but it's so beautiful that you can't
+make yourself care.So I bought it, but I bought it, for the first time, with misgivings.
+I felt the way I'd feel buying something made in a country with a
+bad human rights record.  That was new.  In the past when I bought
+things from Apple it was an unalloyed pleasure.  Oh boy!  They make
+such great stuff.  This time it felt like a Faustian bargain.  They
+make such great stuff, but they're such assholes.  Do I really want
+to support this company?* * *Should Apple care what people like me think?  What difference does
+it make if they alienate a small minority of their users?There are a couple reasons they should care.  One is that these
+users are the people they want as employees.  If your company seems
+evil, the best programmers won't work for you.  That hurt Microsoft
+a lot starting in the 90s.  Programmers started to feel sheepish
+about working there.  It seemed like selling out.  When people from
+Microsoft were talking to other programmers and they mentioned where
+they worked, there were a lot of self-deprecating jokes about having
+gone over to the dark side.  But the real problem for Microsoft
+wasn't the embarrassment of the people they hired.  It was the
+people they never got.  And you know who got them?  Google and
+Apple.  If Microsoft was the Empire, they were the Rebel Alliance.
+And it's largely because they got more of the best people that
+Google and Apple are doing so much better than Microsoft today.Why are programmers so fussy about their employers' morals?  Partly
+because they can afford to be.  The best programmers can work
+wherever they want.  They don't have to work for a company they
+have qualms about.But the other reason programmers are fussy, I think, is that evil
+begets stupidity.  An organization that wins by exercising power
+starts to lose the ability to win by doing better work.  And it's
+not fun for a smart person to work in a place where the best ideas
+aren't the ones that win.  I think the reason Google embraced "Don't
+be evil" so eagerly was not so much to impress the outside world
+as to inoculate themselves against arrogance.
+[1]That has worked for Google so far.  They've become more
+bureaucratic, but otherwise they seem to have held true to their
+original principles. With Apple that seems less the case.  When you
+look at the famous 
+1984 ad 
+now, it's easier to imagine Apple as the
+dictator on the screen than the woman with the hammer.
+[2]
+In fact, if you read the dictator's speech it sounds uncannily like a
+prophecy of the App Store.
+
+  We have triumphed over the unprincipled dissemination of facts.We have created, for the first time in all history, a garden of
+  pure ideology, where each worker may bloom secure from the pests
+  of contradictory and confusing truths.
+
+The other reason Apple should care what programmers think of them
+is that when you sell a platform, developers make or break you.  If
+anyone should know this, Apple should.  VisiCalc made the Apple II.And programmers build applications for the platforms they use.  Most
+applications—most startups, probably—grow out of personal projects.
+Apple itself did.  Apple made microcomputers because that's what
+Steve Wozniak wanted for himself.  He couldn't have afforded a
+minicomputer. 
+[3]
+ Microsoft likewise started out making interpreters
+for little microcomputers because
+Bill Gates and Paul Allen were interested in using them.  It's a
+rare startup that doesn't build something the founders use.The main reason there are so many iPhone apps is that so many programmers
+have iPhones.  They may know, because they read it in an article,
+that Blackberry has such and such market share.  But in practice
+it's as if RIM didn't exist. If they're going to build something,
+they want to be able to use it themselves, and that means building
+an iPhone app.So programmers continue to develop iPhone apps, even though Apple
+continues to maltreat them.  They're like someone stuck in an abusive
+relationship.  They're so attracted to the iPhone that they can't
+leave.  But they're looking for a way out.  One wrote:
+
+  While I did enjoy developing for the iPhone, the control they
+  place on the App Store does not give me the drive to develop
+  applications as I would like. In fact I don't intend to make any
+  more iPhone applications unless absolutely necessary.
+[4]
+
+Can anything break this cycle?  No device I've seen so far could.
+Palm and RIM haven't a hope.  The only credible contender is Android.
+But Android is an orphan; Google doesn't really care about it, not
+the way Apple cares about the iPhone.  Apple cares about the iPhone
+the way Google cares about search.* * *Is the future of handheld devices one locked down by Apple?  It's
+a worrying prospect.  It would be a bummer to have another grim
+monoculture like we had in the 1990s.  In 1995, writing software
+for end users was effectively identical with writing Windows
+applications.  Our horror at that prospect was the single biggest
+thing that drove us to start building web apps.At least we know now what it would take to break Apple's lock.
+You'd have to get iPhones out of programmers' hands.  If programmers
+used some other device for mobile web access, they'd start to develop
+apps for that instead.How could you make a device programmers liked better than the iPhone?
+It's unlikely you could make something better designed.  Apple
+leaves no room there.  So this alternative device probably couldn't
+win on general appeal.  It would have to win by virtue of some
+appeal it had to programmers specifically.One way to appeal to programmers is with software.  If you
+could think of an application programmers had to have, but that
+would be impossible in the circumscribed world of the iPhone, 
+you could presumably get them to switch.That would definitely happen if programmers started to use handhelds
+as development machines—if handhelds displaced laptops the
+way laptops displaced desktops.  You need more control of a development
+machine than Apple will let you have over an iPhone.Could anyone make a device that you'd carry around in your pocket
+like a phone, and yet would also work as a development machine?
+It's hard to imagine what it would look like.  But I've learned
+never to say never about technology.  A phone-sized device that
+would work as a development machine is no more miraculous by present
+standards than the iPhone itself would have seemed by the standards
+of 1995.My current development machine is a MacBook Air, which I use with
+an external monitor and keyboard in my office, and by itself when
+traveling.  If there was a version half the size I'd prefer it.
+That still wouldn't be small enough to carry around everywhere like
+a phone, but we're within a factor of 4 or so.  Surely that gap is
+bridgeable.  In fact, let's make it an
+RFS. Wanted: 
+Woman with hammer.Notes[1]
+When Google adopted "Don't be evil," they were still so small
+that no one would have expected them to be, yet.
+[2]
+The dictator in the 1984 ad isn't Microsoft, incidentally;
+it's IBM.  IBM seemed a lot more frightening in those days, but
+they were friendlier to developers than Apple is now.[3]
+He couldn't even afford a monitor.  That's why the Apple
+I used a TV as a monitor.[4]
+Several people I talked to mentioned how much they liked the
+iPhone SDK.  The problem is not Apple's products but their policies.
+Fortunately policies are software; Apple can change them instantly
+if they want to.  Handy that, isn't it?Thanks to Sam Altman, Trevor Blackwell, Ross Boucher, 
+James Bracy, Gabor Cselle,
+Patrick Collison, Jason Freedman, John Gruber, Joe Hewitt, Jessica Livingston,
+Robert Morris, Teng Siong Ong, Nikhil Pandit, Savraj Singh, and Jared Tame for reading drafts of this.
\ No newline at end of file
diff --git a/data/PaulGrahamEssaysLarge/avg.txt b/data/PaulGrahamEssaysLarge/avg.txt
new file mode 100644
index 0000000..a3ec04d
--- /dev/null
+++ b/data/PaulGrahamEssaysLarge/avg.txt
@@ -0,0 +1,375 @@
+
+
+Want to start a startup?  Get funded by
+Y Combinator.
+
+
+
+
+April 2001, rev. April 2003(This article is derived from a talk given at the 2001 Franz
+Developer Symposium.)
+In the summer of 1995, my friend Robert Morris and I
+started a startup called 
+Viaweb.  
+Our plan was to write
+software that would let end users build online stores.
+What was novel about this software, at the time, was
+that it ran on our server, using ordinary Web pages
+as the interface.A lot of people could have been having this idea at the
+same time, of course, but as far as I know, Viaweb was
+the first Web-based application.  It seemed such
+a novel idea to us that we named the company after it:
+Viaweb, because our software worked via the Web,
+instead of running on your desktop computer.Another unusual thing about this software was that it
+was written primarily in a programming language called
+Lisp. It was one of the first big end-user
+applications to be written in Lisp, which up till then
+had been used mostly in universities and research labs. [1]The Secret WeaponEric Raymond has written an essay called "How to Become a Hacker,"
+and in it, among other things, he tells would-be hackers what
+languages they should learn.  He suggests starting with Python and
+Java, because they are easy to learn.  The serious hacker will also
+want to learn C, in order to hack Unix, and Perl for system
+administration and cgi scripts.  Finally, the truly serious hacker
+should consider learning Lisp:
+
+  Lisp is worth learning for the profound enlightenment experience
+  you will have when you finally get it; that experience will make
+  you a better programmer for the rest of your days, even if you
+  never actually use Lisp itself a lot.
+
+This is the same argument you tend to hear for learning Latin.  It
+won't get you a job, except perhaps as a classics professor, but
+it will improve your mind, and make you a better writer in languages
+you do want to use, like English.But wait a minute.  This metaphor doesn't stretch that far.  The
+reason Latin won't get you a job is that no one speaks it.  If you
+write in Latin, no one can understand you.  But Lisp is a computer
+language, and computers speak whatever language you, the programmer,
+tell them to.So if Lisp makes you a better programmer, like he says, why wouldn't
+you want to use it? If a painter were offered a brush that would
+make him a better painter, it seems to me that he would want to
+use it in all his paintings, wouldn't he? I'm not trying to make
+fun of Eric Raymond here.  On the whole, his advice is good.  What
+he says about Lisp is pretty much the conventional wisdom.  But
+there is a contradiction in the conventional wisdom:  Lisp will
+make you a better programmer, and yet you won't use it.Why not?  Programming languages are just tools, after all.  If Lisp
+really does yield better programs, you should use it.  And if it
+doesn't, then who needs it?This is not just a theoretical question.  Software is a very
+competitive business, prone to natural monopolies.  A company that
+gets software written faster and better will, all other things
+being equal, put its competitors out of business.  And when you're
+starting a startup, you feel this very keenly.  Startups tend to
+be an all or nothing proposition.  You either get rich, or you get
+nothing.  In a startup, if you bet on the wrong technology, your
+competitors will crush you.Robert and I both knew Lisp well, and we couldn't see any reason
+not to trust our instincts and go with Lisp.  We knew that everyone
+else was writing their software in C++ or Perl.  But we also knew
+that that didn't mean anything.  If you chose technology that way,
+you'd be running Windows.  When you choose technology, you have to
+ignore what other people are doing, and consider only what will
+work the best.This is especially true in a startup.  In a big company, you can
+do what all the other big companies are doing.  But a startup can't
+do what all the other startups do.  I don't think a lot of people
+realize this, even in startups.The average big company grows at about ten percent a year.  So if
+you're running a big company and you do everything the way the
+average big company does it, you can expect to do as well as the
+average big company-- that is, to grow about ten percent a year.The same thing will happen if you're running a startup, of course.
+If you do everything the way the average startup does it, you should
+expect average performance.  The problem here is, average performance
+means that you'll go out of business.  The survival rate for startups
+is way less than fifty percent.  So if you're running a startup,
+you had better be doing something odd.  If not, you're in trouble.Back in 1995, we knew something that I don't think our competitors
+understood, and few understand even now:  when you're writing
+software that only has to run on your own servers, you can use
+any language you want.  When you're writing desktop software,
+there's a strong bias toward writing applications in the same
+language as the operating system.  Ten years ago, writing applications
+meant writing applications in C.  But with Web-based software,
+especially when you have the source code of both the language and
+the operating system, you can use whatever language you want.This new freedom is a double-edged sword, however.  Now that you
+can use any language, you have to think about which one to use.
+Companies that try to pretend nothing has changed risk finding that
+their competitors do not.If you can use any language, which do you use?  We chose Lisp.
+For one thing, it was obvious that rapid development would be
+important in this market.  We were all starting from scratch, so
+a company that could get new features done before its competitors
+would have a big advantage.  We knew Lisp was a really good language
+for writing software quickly, and server-based applications magnify
+the effect of rapid development, because you can release software
+the minute it's done.If other companies didn't want to use Lisp, so much the better.
+It might give us a technological edge, and we needed all the help
+we could get.  When we started Viaweb, we had no experience in
+business.  We didn't know anything about marketing, or hiring
+people, or raising money, or getting customers.  Neither of us had
+ever even had what you would call a real job.  The only thing we
+were good at was writing software.  We hoped that would save us.
+Any advantage we could get in the software department, we would
+take.So you could say that using Lisp was an experiment.  Our hypothesis
+was that if we wrote our software in Lisp, we'd be able to get
+features done faster than our competitors, and also to do things
+in our software that they couldn't do.  And because Lisp was so
+high-level, we wouldn't need a big development team, so our costs
+would be lower.  If this were so, we could offer a better product
+for less money, and still make a profit.  We would end up getting
+all the users, and our competitors would get none, and eventually
+go out of business.  That was what we hoped would happen, anyway.What were the results of this experiment?  Somewhat surprisingly,
+it worked.  We eventually had many competitors, on the order of
+twenty to thirty of them, but none of their software could compete
+with ours.  We had a wysiwyg online store builder that ran on the
+server and yet felt like a desktop application.  Our competitors
+had cgi scripts.  And we were always far ahead of them in features.
+Sometimes, in desperation, competitors would try to introduce
+features that we didn't have.  But with Lisp our development cycle
+was so fast that we could sometimes duplicate a new feature within
+a day or two of a competitor announcing it in a press release.  By
+the time journalists covering the press release got round to calling
+us, we would have the new feature too.It must have seemed to our competitors that we had some kind of
+secret weapon-- that we were decoding their Enigma traffic or
+something.  In fact we did have a secret weapon, but it was simpler
+than they realized.  No one was leaking news of their features to
+us.   We were just able to develop software faster than anyone
+thought possible.When I was about nine I happened to get hold of a copy of The Day
+of the Jackal, by Frederick Forsyth.  The main character is an
+assassin who is hired to kill the president of France.  The assassin
+has to get past the police to get up to an apartment that overlooks
+the president's route.  He walks right by them, dressed up as an
+old man on crutches, and they never suspect him.Our secret weapon was similar.  We wrote our software in a weird
+AI language, with a bizarre syntax full of parentheses.  For years
+it had annoyed me to hear Lisp described that way.  But now it
+worked to our advantage.  In business, there is nothing more valuable
+than a technical advantage your competitors don't understand.  In
+business, as in war, surprise is worth as much as force.And so, I'm a little embarrassed to say, I never said anything
+publicly about Lisp while we were working on Viaweb.  We never
+mentioned it to the press, and if you searched for Lisp on our Web
+site, all you'd find were the titles of two books in my bio.  This
+was no accident.  A startup should give its competitors as little
+information as possible.  If they didn't know what language our
+software was written in, or didn't care, I wanted to keep it that
+way.[2]The people who understood our technology best were the customers.
+They didn't care what language Viaweb was written in either, but
+they noticed that it worked really well.  It let them build great
+looking online stores literally in minutes.  And so, by word of
+mouth mostly, we got more and more users.  By the end of 1996 we
+had about 70 stores online.  At the end of 1997 we had 500.  Six
+months later, when Yahoo bought us, we had 1070 users.  Today, as
+Yahoo Store, this software continues to dominate its market.  It's
+one of the more profitable pieces of Yahoo, and the stores built
+with it are the foundation of Yahoo Shopping.  I left Yahoo in
+1999, so I don't know exactly how many users they have now, but
+the last I heard there were about 20,000.
+The Blub ParadoxWhat's so great about Lisp?  And if Lisp is so great, why doesn't
+everyone use it?  These sound like rhetorical questions, but actually
+they have straightforward answers.  Lisp is so great not because
+of some magic quality visible only to devotees, but because it is
+simply the most powerful language available.  And the reason everyone
+doesn't use it is that programming languages are not merely
+technologies, but habits of mind as well, and nothing changes
+slower.  Of course, both these answers need explaining.I'll begin with a shockingly controversial statement:  programming
+languages vary in power.Few would dispute, at least, that high level languages are more
+powerful than machine language.  Most programmers today would agree
+that you do not, ordinarily, want to program in machine language.
+Instead, you should program in a high-level language, and have a
+compiler translate it into machine language for you.  This idea is
+even built into the hardware now: since the 1980s, instruction sets
+have been designed for compilers rather than human programmers.Everyone knows it's a mistake to write your whole program by hand
+in machine language.  What's less often understood is that there
+is a more general principle here: that if you have a choice of
+several languages, it is, all other things being equal, a mistake
+to program in anything but the most powerful one. [3]There are many exceptions to this rule.  If you're writing a program
+that has to work very closely with a program written in a certain
+language, it might be a good idea to write the new program in the
+same language.  If you're writing a program that only has to do
+something very simple, like number crunching or bit manipulation,
+you may as well use a less abstract language, especially since it
+may be slightly faster.  And if you're writing a short, throwaway
+program, you may be better off just using whatever language has
+the best library functions for the task.  But in general, for
+application software, you want to be using the most powerful
+(reasonably efficient) language you can get, and using anything
+else is a mistake, of exactly the same kind, though possibly in a
+lesser degree, as programming in machine language.You can see that machine language is very low level.  But, at least
+as a kind of social convention, high-level languages are often all
+treated as equivalent.  They're not.  Technically the term "high-level
+language" doesn't mean anything very definite.  There's no dividing
+line with machine languages on one side and all the high-level
+languages on the other.  Languages fall along a continuum [4] of
+abstractness, from the most powerful all the way down to machine
+languages, which themselves vary in power.Consider Cobol.  Cobol is a high-level language, in the sense that
+it gets compiled into machine language.  Would anyone seriously
+argue that Cobol is equivalent in power to, say, Python?  It's
+probably closer to machine language than Python.Or how about Perl 4?  Between Perl 4 and Perl 5, lexical closures
+got added to the language.  Most Perl hackers would agree that Perl
+5 is more powerful than Perl 4.  But once you've admitted that,
+you've admitted that one high level language can be more powerful
+than another.  And it follows inexorably that, except in special
+cases, you ought to use the most powerful you can get.This idea is rarely followed to its conclusion, though.  After a
+certain age, programmers rarely switch languages voluntarily.
+Whatever language people happen to be used to, they tend to consider
+just good enough.Programmers get very attached to their favorite languages, and I
+don't want to hurt anyone's feelings, so to explain this point I'm
+going to use a hypothetical language called Blub.  Blub falls right
+in the middle of the abstractness continuum.  It is not the most
+powerful language, but it is more powerful than Cobol or machine
+language.And in fact, our hypothetical Blub programmer wouldn't use either
+of them.  Of course he wouldn't program in machine language.  That's
+what compilers are for.  And as for Cobol, he doesn't know how
+anyone can get anything done with it.  It doesn't even have x (Blub
+feature of your choice).As long as our hypothetical Blub programmer is looking down the
+power continuum, he knows he's looking down.  Languages less powerful
+than Blub are obviously less powerful, because they're missing some
+feature he's used to.  But when our hypothetical Blub programmer
+looks in the other direction, up the power continuum, he doesn't
+realize he's looking up.  What he sees are merely weird languages.
+He probably considers them about equivalent in power to Blub, but
+with all this other hairy stuff thrown in as well.  Blub is good
+enough for him, because he thinks in Blub.When we switch to the point of view of a programmer using any of
+the languages higher up the power continuum, however, we find that
+he in turn looks down upon Blub.  How can you get anything done in
+Blub? It doesn't even have y.By induction, the only programmers in a position to see all the
+differences in power between the various languages are those who
+understand the most powerful one.  (This is probably what Eric
+Raymond meant about Lisp making you a better programmer.) You can't
+trust the opinions of the others, because of the Blub paradox:
+they're satisfied with whatever language they happen to use, because
+it dictates the way they think about programs.I know this from my own experience, as a high school kid writing
+programs in Basic.  That language didn't even support recursion.
+It's hard to imagine writing programs without using recursion, but
+I didn't miss it at the time.  I thought in Basic.  And I was a
+whiz at it.  Master of all I surveyed.The five languages that Eric Raymond recommends to hackers fall at
+various points on the power continuum.  Where they fall relative
+to one another is a sensitive topic.  What I will say is that I
+think Lisp is at the top.  And to support this claim I'll tell you
+about one of the things I find missing when I look at the other
+four languages.  How can you get anything done in them, I think,
+without macros? [5]Many languages have something called a macro.  But Lisp macros are
+unique.  And believe it or not, what they do is related to the
+parentheses.  The designers of Lisp didn't put all those parentheses
+in the language just to be different.  To the Blub programmer, Lisp
+code looks weird.  But those parentheses are there for a reason.
+They are the outward evidence of a fundamental difference between
+Lisp and other languages.Lisp code is made out of Lisp data objects.  And not in the trivial
+sense that the source files contain characters, and strings are
+one of the data types supported by the language.  Lisp code, after
+it's read by the parser, is made of data structures that you can
+traverse.If you understand how compilers work, what's really going on is
+not so much that Lisp has a strange syntax as that Lisp has no
+syntax.  You write programs in the parse trees that get generated
+within the compiler when other languages are parsed.  But these
+parse trees are fully accessible to your programs.  You can write
+programs that manipulate them.  In Lisp, these programs are called
+macros.  They are programs that write programs.Programs that write programs?  When would you ever want to do that?
+Not very often, if you think in Cobol.  All the time, if you think
+in Lisp.  It would be convenient here if I could give an example
+of a powerful macro, and say there! how about that?  But if I did,
+it would just look like gibberish to someone who didn't know Lisp;
+there isn't room here to explain everything you'd need to know to
+understand what it meant.  In 
+Ansi Common Lisp I tried to move
+things along as fast as I could, and even so I didn't get to macros
+until page 160.But I think I can give a kind of argument that might be convincing.
+The source code of the Viaweb editor was probably about 20-25%
+macros.  Macros are harder to write than ordinary Lisp functions,
+and it's considered to be bad style to use them when they're not
+necessary.  So every macro in that code is there because it has to
+be.  What that means is that at least 20-25% of the code in this
+program is doing things that you can't easily do in any other
+language.  However skeptical the Blub programmer might be about my
+claims for the mysterious powers of Lisp, this ought to make him
+curious.  We weren't writing this code for our own amusement.  We
+were a tiny startup, programming as hard as we could in order to
+put technical barriers between us and our competitors.A suspicious person might begin to wonder if there was some
+correlation here.  A big chunk of our code was doing things that
+are very hard to do in other languages.  The resulting software
+did things our competitors' software couldn't do.  Maybe there was
+some kind of connection.  I encourage you to follow that thread.
+There may be more to that old man hobbling along on his crutches
+than meets the eye.Aikido for StartupsBut I don't expect to convince anyone 
+(over 25) 
+to go out and learn
+Lisp.  The purpose of this article is not to change anyone's mind,
+but to reassure people already interested in using Lisp-- people
+who know that Lisp is a powerful language, but worry because it
+isn't widely used.  In a competitive situation, that's an advantage.
+Lisp's power is multiplied by the fact that your competitors don't
+get it.If you think of using Lisp in a startup, you shouldn't worry that
+it isn't widely understood.  You should hope that it stays that
+way. And it's likely to.  It's the nature of programming languages
+to make most people satisfied with whatever they currently use.
+Computer hardware changes so much faster than personal habits that
+programming practice is usually ten to twenty years behind the
+processor.  At places like MIT they were writing programs in
+high-level languages in the early 1960s, but many companies continued
+to write code in machine language well into the 1980s.  I bet a
+lot of people continued to write machine language until the processor,
+like a bartender eager to close up and go home, finally kicked them
+out by switching to a risc instruction set.Ordinarily technology changes fast.  But programming languages are
+different: programming languages are not just technology, but what
+programmers think in.  They're half technology and half religion.[6]
+And so the median language, meaning whatever language the median
+programmer uses, moves as slow as an iceberg.  Garbage collection,
+introduced by Lisp in about 1960, is now widely considered to be
+a good thing.  Runtime typing, ditto, is growing in popularity.
+Lexical closures, introduced by Lisp in the early 1970s, are now,
+just barely, on the radar screen.  Macros, introduced by Lisp in the
+mid 1960s, are still terra incognita.Obviously, the median language has enormous momentum.  I'm not
+proposing that you can fight this powerful force.  What I'm proposing
+is exactly the opposite: that, like a practitioner of Aikido, you
+can use it against your opponents.If you work for a big company, this may not be easy.  You will have
+a hard time convincing the pointy-haired boss to let you build
+things in Lisp, when he has just read in the paper that some other
+language is poised, like Ada was twenty years ago, to take over
+the world.  But if you work for a startup that doesn't have
+pointy-haired bosses yet, you can, like we did, turn the Blub
+paradox to your advantage:  you can use technology that your
+competitors, glued immovably to the median language, will never be
+able to match.If you ever do find yourself working for a startup, here's a handy
+tip for evaluating competitors.  Read their job listings.  Everything
+else on their site may be stock photos or the prose equivalent,
+but the job listings have to be specific about what they want, or
+they'll get the wrong candidates.During the years we worked on Viaweb I read a lot of job descriptions.
+A new competitor seemed to emerge out of the woodwork every month
+or so.  The first thing I would do, after checking to see if they
+had a live online demo, was look at their job listings.  After a
+couple years of this I could tell which companies to worry about
+and which not to.  The more of an IT flavor the job descriptions
+had, the less dangerous the company was.  The safest kind were the
+ones that wanted Oracle experience.  You never had to worry about
+those.  You were also safe if they said they wanted C++ or Java
+developers.  If they wanted Perl or Python programmers, that would
+be a bit frightening-- that's starting to sound like a company
+where the technical side, at least, is run by real hackers.  If I
+had ever seen a job posting looking for Lisp hackers, I would have
+been really worried.
+Notes[1] Viaweb at first had two parts: the editor, written in Lisp,
+which people used to build their sites, and the ordering system,
+written in C, which handled orders.  The first version was mostly
+Lisp, because the ordering system was small.  Later we added two
+more modules, an image generator written in C, and a back-office
+manager written mostly in Perl.In January 2003, Yahoo released a new version of the editor 
+written in C++ and Perl.  It's hard to say whether the program is no
+longer written in Lisp, though, because to translate this program
+into C++ they literally had to write a Lisp interpreter: the source
+files of all the page-generating templates are still, as far as I
+know,  Lisp code.  (See Greenspun's Tenth Rule.)[2] Robert Morris says that I didn't need to be secretive, because
+even if our competitors had known we were using Lisp, they wouldn't
+have understood why:  "If they were that smart they'd already be
+programming in Lisp."[3] All languages are equally powerful in the sense of being Turing
+equivalent, but that's not the sense of the word programmers care
+about. (No one wants to program a Turing machine.)  The kind of
+power programmers care about may not be formally definable, but
+one way to explain it would be to say that it refers to features
+you could only get in the less powerful language by writing an
+interpreter for the more powerful language in it. If language A
+has an operator for removing spaces from strings and language B
+doesn't, that probably doesn't make A more powerful, because you
+can probably write a subroutine to do it in B.  But if A supports,
+say, recursion, and B doesn't, that's not likely to be something
+you can fix by writing library functions.[4] Note to nerds: or possibly a lattice, narrowing toward the top;
+it's not the shape that matters here but the idea that there is at
+least a partial order.[5] It is a bit misleading to treat macros as a separate feature.
+In practice their usefulness is greatly enhanced by other Lisp
+features like lexical closures and rest parameters.[6] As a result, comparisons of programming languages either take
+the form of religious wars or undergraduate textbooks so determinedly
+neutral that they're really works of anthropology.  People who
+value their peace, or want tenure, avoid the topic.  But the question
+is only half a religious one; there is something there worth
+studying, especially if you want to design new languages.
\ No newline at end of file
diff --git a/data/PaulGrahamEssaysLarge/before.txt b/data/PaulGrahamEssaysLarge/before.txt
new file mode 100644
index 0000000..9d0e393
--- /dev/null
+++ b/data/PaulGrahamEssaysLarge/before.txt
@@ -0,0 +1,387 @@
+
+
+Want to start a startup?  Get funded by
+Y Combinator.
+
+
+
+
+October 2014(This essay is derived from a guest lecture in Sam Altman's startup class at
+Stanford.  It's intended for college students, but much of it is
+applicable to potential founders at other ages.)One of the advantages of having kids is that when you have to give
+advice, you can ask yourself "what would I tell my own kids?"  My
+kids are little, but I can imagine what I'd tell them about startups
+if they were in college, and that's what I'm going to tell you.Startups are very counterintuitive.  I'm not sure why.  Maybe it's
+just because knowledge about them hasn't permeated our culture yet.
+But whatever the reason, starting a startup is a task where you
+can't always trust your instincts.It's like skiing in that way.  When you first try skiing and you
+want to slow down, your instinct is to lean back.  But if you lean
+back on skis you fly down the hill out of control.  So part of
+learning to ski is learning to suppress that impulse.  Eventually
+you get new habits, but at first it takes a conscious effort.  At
+first there's a list of things you're trying to remember as you
+start down the hill.Startups are as unnatural as skiing, so there's a similar list for
+startups. Here I'm going to give you the first part of it — the things
+to remember if you want to prepare yourself to start a startup.
+CounterintuitiveThe first item on it is the fact I already mentioned: that startups
+are so weird that if you trust your instincts, you'll make a lot
+of mistakes.  If you know nothing more than this, you may at least
+pause before making them.When I was running Y Combinator I used to joke that our function
+was to tell founders things they would ignore.  It's really true.
+Batch after batch, the YC partners warn founders about mistakes
+they're about to make, and the founders ignore them, and then come
+back a year later and say "I wish we'd listened."Why do the founders ignore the partners' advice?  Well, that's the
+thing about counterintuitive ideas: they contradict your intuitions.
+They seem wrong.  So of course your first impulse is to disregard
+them.  And in fact my joking description is not merely the curse
+of Y Combinator but part of its raison d'etre. If founders' instincts
+already gave them the right answers, they wouldn't need us.  You
+only need other people to give you advice that surprises you. That's
+why there are a lot of ski instructors and not many running
+instructors.
+[1]You can, however, trust your instincts about people.  And in fact
+one of the most common mistakes young founders make is not to
+do that enough.  They get involved with people who seem impressive,
+but about whom they feel some misgivings personally.  Later when
+things blow up they say "I knew there was something off about him,
+but I ignored it because he seemed so impressive."If you're thinking about getting involved with someone — as a
+cofounder, an employee, an investor, or an acquirer — and you
+have misgivings about them, trust your gut.  If someone seems
+slippery, or bogus, or a jerk, don't ignore it.This is one case where it pays to be self-indulgent. Work with
+people you genuinely like, and you've known long enough to be sure.
+ExpertiseThe second counterintuitive point is that it's not that important
+to know a lot about startups.  The way to succeed in a startup is
+not to be an expert on startups, but to be an expert on your users
+and the problem you're solving for them.
+Mark Zuckerberg didn't succeed because he was an expert on startups.
+He succeeded despite being a complete noob at startups, because he
+understood his users really well.If you don't know anything about, say, how to raise an angel round,
+don't feel bad on that account.  That sort of thing you can learn
+when you need to, and forget after you've done it.In fact, I worry it's not merely unnecessary to learn in great
+detail about the mechanics of startups, but possibly somewhat
+dangerous.  If I met an undergrad who knew all about convertible
+notes and employee agreements and (God forbid) class FF stock, I
+wouldn't think "here is someone who is way ahead of their peers."
+It would set off alarms.  Because another of the characteristic
+mistakes of young founders is to go through the motions of starting
+a startup.  They make up some plausible-sounding idea, raise money
+at a good valuation, rent a cool office, hire a bunch of people.
+From the outside that seems like what startups do.  But the next
+step after rent a cool office and hire a bunch of people is: gradually
+realize how completely fucked they are, because while imitating all
+the outward forms of a startup they have neglected the one thing
+that's actually essential: making something people want.
+GameWe saw this happen so often that we made up a name for it: playing
+house.  Eventually I realized why it was happening.  The reason
+young founders go through the motions of starting a startup is
+because that's what they've been trained to do for their whole lives
+up to that point.  Think about what you have to do to get into
+college, for example.  Extracurricular activities, check.  Even in
+college classes most of the work is as artificial as running laps.I'm not attacking the educational system for being this way. There
+will always be a certain amount of fakeness in the work you do when
+you're being taught something, and if you measure their performance
+it's inevitable that people will exploit the difference to the point
+where much of what you're measuring is artifacts of the fakeness.I confess I did it myself in college. I found that in a lot of
+classes there might only be 20 or 30 ideas that were the right shape
+to make good exam questions.  The way I studied for exams in these
+classes was not (except incidentally) to master the material taught
+in the class, but to make a list of potential exam questions and
+work out the answers in advance. When I walked into the final, the
+main thing I'd be feeling was curiosity about which of my questions
+would turn up on the exam.  It was like a game.It's not surprising that after being trained for their whole lives
+to play such games, young founders' first impulse on starting a
+startup is to try to figure out the tricks for winning at this new
+game. Since fundraising appears to be the measure of success for
+startups (another classic noob mistake), they always want to know what the
+tricks are for convincing investors.  We tell them the best way to
+convince investors is to make a startup
+that's actually doing well, meaning growing fast, and then simply
+tell investors so.  Then they want to know what the tricks are for
+growing fast.  And we have to tell them the best way to do that is
+simply to make something people want.So many of the conversations YC partners have with young founders
+begin with the founder asking "How do we..." and the partner replying
+"Just..."Why do the founders always make things so complicated?  The reason,
+I realized, is that they're looking for the trick.So this is the third counterintuitive thing to remember about
+startups: starting a startup is where gaming the system stops
+working.  Gaming the system may continue to work if you go to work
+for a big company. Depending on how broken the company is, you can
+succeed by sucking up to the right people, giving the impression
+of productivity, and so on. 
+[2]
+But that doesn't work with startups.
+There is no boss to trick, only users, and all users care about is
+whether your product does what they want. Startups are as impersonal
+as physics.  You have to make something people want, and you prosper
+only to the extent you do.The dangerous thing is, faking does work to some degree on investors.
+If you're super good at sounding like you know what you're talking
+about, you can fool investors for at least one and perhaps even two
+rounds of funding.  But it's not in your interest to.  The company
+is ultimately doomed.  All you're doing is wasting your own time
+riding it down.So stop looking for the trick. There are tricks in startups, as
+there are in any domain, but they are an order of magnitude less
+important than solving the real problem. A founder who knows nothing
+about fundraising but has made something users love will have an
+easier time raising money than one who knows every trick in the
+book but has a flat usage graph. And more importantly, the founder
+who has made something users love is the one who will go on to
+succeed after raising the money.Though in a sense it's bad news in that you're deprived of one of
+your most powerful weapons, I think it's exciting that gaming the
+system stops working when you start a startup.  It's exciting that
+there even exist parts of the world where you win by doing good
+work.  Imagine how depressing the world would be if it were all
+like school and big companies, where you either have to spend a lot
+of time on bullshit things or lose to people who do.
+[3]
+I would
+have been delighted if I'd realized in college that there were parts
+of the real world where gaming the system mattered less than others,
+and a few where it hardly mattered at all.  But there are, and this
+variation is one of the most important things to consider when
+you're thinking about your future.  How do you win in each type of
+work, and what would you like to win by doing?
+[4]
+All-ConsumingThat brings us to our fourth counterintuitive point: startups are
+all-consuming.  If you start a startup, it will take over your life
+to a degree you cannot imagine.  And if your startup succeeds, it
+will take over your life for a long time: for several years at the
+very least, maybe for a decade, maybe for the rest of your working
+life.  So there is a real opportunity cost here.Larry Page may seem to have an enviable life, but there are aspects
+of it that are unenviable.  Basically at 25 he started running as
+fast as he could and it must seem to him that he hasn't stopped to
+catch his breath since.  Every day new shit happens in the Google
+empire that only the CEO can deal with, and he, as CEO, has to deal
+with it.  If he goes on vacation for even a week, a whole week's
+backlog of shit accumulates.  And he has to bear this uncomplainingly,
+partly because as the company's daddy he can never show fear or
+weakness, and partly because billionaires get less than zero sympathy
+if they talk about having difficult lives.  Which has the strange
+side effect that the difficulty of being a successful startup founder
+is concealed from almost everyone except those who've done it.Y Combinator has now funded several companies that can be called
+big successes, and in every single case the founders say the same
+thing.  It never gets any easier.  The nature of the problems change.
+You're worrying about construction delays at your London office
+instead of the broken air conditioner in your studio apartment.
+But the total volume of worry never decreases; if anything it
+increases.Starting a successful startup is similar to having kids in that
+it's like a button you push that changes your life irrevocably.
+And while it's truly wonderful having kids, there are a lot of
+things that are easier to do before you have them than after.  Many
+of which will make you a better parent when you do have kids. And
+since you can delay pushing the button for a while, most people in
+rich countries do.Yet when it comes to startups, a lot of people seem to think they're
+supposed to start them while they're still in college.  Are you
+crazy?  And what are the universities thinking?  They go out of
+their way to ensure their students are well supplied with contraceptives,
+and yet they're setting up entrepreneurship programs and startup
+incubators left and right.To be fair, the universities have their hand forced here.  A lot
+of incoming students are interested in startups.  Universities are,
+at least de facto, expected to prepare them for their careers.  So
+students who want to start startups hope universities can teach
+them about startups.  And whether universities can do this or not,
+there's some pressure to claim they can, lest they lose applicants
+to other universities that do.Can universities teach students about startups?  Yes and no.  They
+can teach students about startups, but as I explained before, this
+is not what you need to know.  What you need to learn about are the
+needs of your own users, and you can't do that until you actually
+start the company.
+[5]
+So starting a startup is intrinsically
+something you can only really learn by doing it.  And it's impossible
+to do that in college, for the reason I just explained: startups
+take over your life.  You can't start a startup for real as a
+student, because if you start a startup for real you're not a student
+anymore. You may be nominally a student for a bit, but you won't even
+be that for long.
+[6]Given this dichotomy, which of the two paths should you take?  Be
+a real student and not start a startup, or start a real startup and
+not be a student?  I can answer that one for you. Do not start a
+startup in college.  How to start a startup is just a subset of a
+bigger problem you're trying to solve: how to have a good life.
+And though starting a startup can be part of a good life for a lot
+of ambitious people, age 20 is not the optimal time to do it.
+Starting a startup is like a brutally fast depth-first search.  Most
+people should still be searching breadth-first at 20.You can do things in your early 20s that you can't do as well before
+or after, like plunge deeply into projects on a whim and travel
+super cheaply with no sense of a deadline.  For unambitious people,
+this sort of thing is the dreaded "failure to launch," but for the
+ambitious ones it can be an incomparably valuable sort of exploration.
+If you start a startup at 20 and you're sufficiently successful,
+you'll never get to do it.
+[7]Mark Zuckerberg will never get to bum around a foreign country.  He
+can do other things most people can't, like charter jets to fly him
+to foreign countries. But success has taken a lot of the serendipity
+out of his life. Facebook is running him as much as he's running
+Facebook. And while it can be very cool to be in the grip of a
+project you consider your life's work, there are advantages to
+serendipity too, especially early in life.  Among other things it
+gives you more options to choose your life's work from.There's not even a tradeoff here. You're not sacrificing anything
+if you forgo starting a startup at 20, because you're more likely
+to succeed if you wait.  In the unlikely case that you're 20 and
+one of your side projects takes off like Facebook did, you'll face
+a choice of running with it or not, and it may be reasonable to run
+with it.  But the usual way startups take off is for the founders
+to make them take off, and it's gratuitously
+stupid to do that at 20.
+TryShould you do it at any age?  I realize I've made startups sound
+pretty hard.  If I haven't, let me try again: starting a startup
+is really hard.  What if it's too hard?  How can you tell if you're
+up to this challenge?The answer is the fifth counterintuitive point: you can't tell. Your
+life so far may have given you some idea what your prospects might
+be if you tried to become a mathematician, or a professional football
+player.  But unless you've had a very strange life you haven't done
+much that was like being a startup founder.
+Starting a startup will change you a lot.  So what you're trying
+to estimate is not just what you are, but what you could grow into,
+and who can do that?For the past 9 years it was my job to predict whether people would
+have what it took to start successful startups.  It was easy to
+tell how smart they were, and most people reading this will be over
+that threshold.  The hard part was predicting how tough and ambitious they would become.  There
+may be no one who has more experience at trying to predict that,
+so I can tell you how much an expert can know about it, and the
+answer is: not much.  I learned to keep a completely open mind about
+which of the startups in each batch would turn out to be the stars.The founders sometimes think they know. Some arrive feeling sure
+they will ace Y Combinator just as they've aced every one of the (few,
+artificial, easy) tests they've faced in life so far.  Others arrive
+wondering how they got in, and hoping YC doesn't discover whatever
+mistake caused it to accept them.  But there is little correlation
+between founders' initial attitudes and how well their companies
+do.I've read that the same is true in the military — that the
+swaggering recruits are no more likely to turn out to be really
+tough than the quiet ones. And probably for the same reason: that
+the tests involved are so different from the ones in their previous
+lives.If you're absolutely terrified of starting a startup, you probably
+shouldn't do it.  But if you're merely unsure whether you're up to
+it, the only way to find out is to try.  Just not now.
+IdeasSo if you want to start a startup one day, what should you do in
+college?  There are only two things you need initially: an idea and
+cofounders.  And the m.o. for getting both is the same.  Which leads
+to our sixth and last counterintuitive point: that the way to get
+startup ideas is not to try to think of startup ideas.I've written a whole essay on this,
+so I won't repeat it all here.  But the short version is that if
+you make a conscious effort to think of startup ideas, the ideas
+you come up with will not merely be bad, but bad and plausible-sounding,
+meaning you'll waste a lot of time on them before realizing they're
+bad.The way to come up with good startup ideas is to take a step back.
+Instead of making a conscious effort to think of startup ideas,
+turn your mind into the type that startup ideas form in without any
+conscious effort.  In fact, so unconsciously that you don't even
+realize at first that they're startup ideas.This is not only possible, it's how Apple, Yahoo, Google, and
+Facebook all got started.  None of these companies were even meant
+to be companies at first.  They were all just side projects.  The
+best startups almost have to start as side projects, because great
+ideas tend to be such outliers that your conscious mind would reject
+them as ideas for companies.Ok, so how do you turn your mind into the type that startup ideas
+form in unconsciously?  (1) Learn a lot about things that matter,
+then (2) work on problems that interest you (3) with people you
+like and respect.  The third part, incidentally, is how you get
+cofounders at the same time as the idea.The first time I wrote that paragraph, instead of "learn a lot about
+things that matter," I wrote "become good at some technology." But
+that prescription, though sufficient, is too narrow.  What was
+special about Brian Chesky and Joe Gebbia was not that they were
+experts in technology.  They were good at design, and perhaps even
+more importantly, they were good at organizing groups and making
+projects happen.  So you don't have to work on technology per se,
+so long as you work on problems demanding enough to stretch you.What kind of problems are those?  That is very hard to answer in
+the general case.  History is full of examples of young people who
+were working on important problems that no
+one else at the time thought were important, and in particular
+that their parents didn't think were important.  On the other hand,
+history is even fuller of examples of parents who thought their
+kids were wasting their time and who were right.  So how do you
+know when you're working on real stuff?
+[8]I know how I know.  Real problems are interesting, and I am
+self-indulgent in the sense that I always want to work on interesting
+things, even if no one else cares about them (in fact, especially
+if no one else cares about them), and find it very hard to make
+myself work on boring things, even if they're supposed to be
+important.My life is full of case after case where I worked on something just
+because it seemed interesting, and it turned out later to be useful
+in some worldly way.  Y
+Combinator itself was something I only did because it seemed
+interesting. So I seem to have some sort of internal compass that
+helps me out.  But I don't know what other people have in their
+heads. Maybe if I think more about this I can come up with heuristics
+for recognizing genuinely interesting problems, but for the moment
+the best I can offer is the hopelessly question-begging advice that
+if you have a taste for genuinely interesting problems, indulging
+it energetically is the best way to prepare yourself for a startup.
+And indeed, probably also the best way to live.
+[9]But although I can't explain in the general case what counts as an
+interesting problem, I can tell you about a large subset of them.
+If you think of technology as something that's spreading like a
+sort of fractal stain, every moving point on the edge represents
+an interesting problem.  So one guaranteed way to turn your mind
+into the type that has good startup ideas is to get yourself to the
+leading edge of some technology — to cause yourself, as Paul
+Buchheit put it, to "live in the future." When you reach that point,
+ideas that will seem to other people uncannily prescient will seem
+obvious to you.  You may not realize they're startup ideas, but
+you'll know they're something that ought to exist.For example, back at Harvard in the mid 90s a fellow grad student
+of my friends Robert and Trevor wrote his own voice over IP software.
+He didn't mean it to be a startup, and he never tried to turn it
+into one.  He just wanted to talk to his girlfriend in Taiwan without
+paying for long distance calls, and since he was an expert on
+networks it seemed obvious to him that the way to do it was turn
+the sound into packets and ship it over the Internet. He never did
+any more with his software than talk to his girlfriend, but this
+is exactly the way the best startups get started.So strangely enough the optimal thing to do in college if you want
+to be a successful startup founder is not some sort of new, vocational
+version of college focused on "entrepreneurship." It's the classic
+version of college as education for its own sake. If you want to
+start a startup after college, what you should do in college is
+learn powerful things.  And if you have genuine intellectual
+curiosity, that's what you'll naturally tend to do if you just
+follow your own inclinations.
+[10]The component of entrepreneurship that really matters is domain
+expertise.  The way to become Larry Page was to become an expert
+on search. And the way to become an expert on search was to be
+driven by genuine curiosity, not some ulterior motive.At its best, starting a startup is merely an ulterior motive for
+curiosity.  And you'll do it best if you introduce the ulterior
+motive toward the end of the process.So here is the ultimate advice for young would-be startup founders,
+boiled down to two words: just learn.
+Notes[1]
+Some founders listen more than others, and this tends to be a
+predictor of success. One of the things I
+remember about the Airbnbs during YC is how intently they listened.[2]
+In fact, this is one of the reasons startups are possible.  If
+big companies weren't plagued by internal inefficiencies, they'd
+be proportionately more effective, leaving less room for startups.[3]
+In a startup you have to spend a lot of time on schleps, but this sort of work is merely
+unglamorous, not bogus.[4]
+What should you do if your true calling is gaming the system?
+Management consulting.[5]
+The company may not be incorporated, but if you start to get
+significant numbers of users, you've started it, whether you realize
+it yet or not.[6]
+It shouldn't be that surprising that colleges can't teach
+students how to be good startup founders, because they can't teach
+them how to be good employees either.The way universities "teach" students how to be employees is to
+hand off the task to companies via internship programs.  But you
+couldn't do the equivalent thing for startups, because by definition
+if the students did well they would never come back.[7]
+Charles Darwin was 22 when he received an invitation to travel
+aboard the HMS Beagle as a naturalist.  It was only because he was
+otherwise unoccupied, to a degree that alarmed his family, that he
+could accept it. And yet if he hadn't we probably would not know
+his name.[8]
+Parents can sometimes be especially conservative in this
+department.  There are some whose definition of important problems
+includes only those on the critical path to med school.[9]
+I did manage to think of a heuristic for detecting whether you
+have a taste for interesting ideas: whether you find known boring
+ideas intolerable.  Could you endure studying literary theory, or
+working in middle management at a large company?[10]
+In fact, if your goal is to start a startup, you can stick
+even more closely to the ideal of a liberal education than past
+generations have. Back when students focused mainly on getting a
+job after college, they thought at least a little about how the
+courses they took might look to an employer.  And perhaps even
+worse, they might shy away from taking a difficult class lest they
+get a low grade, which would harm their all-important GPA.  Good
+news: users don't care what your GPA
+was.  And I've never heard of investors caring either.  Y Combinator
+certainly never asks what classes you took in college or what grades
+you got in them.
+Thanks to Sam Altman, Paul Buchheit, John Collison, Patrick
+Collison, Jessica Livingston, Robert Morris, Geoff Ralston, and
+Fred Wilson for reading drafts of this.
\ No newline at end of file
diff --git a/docs/prompt b/docs/prompt
index 30404ce..6f133cd 100644
--- a/docs/prompt
+++ b/docs/prompt
@@ -1 +1,222 @@
-TODO
\ No newline at end of file
+# Example App for text summarization & QA using minillmflow
+from minillmflow import Node, BatchNode, Flow, BatchFlow, AsyncNode, AsyncFlow, BatchAsyncFlow
+import os
+
+# 1) Implement a simple LLM helper (OpenAI in this example).
+def call_LLM(prompt):
+    # Users must set an OpenAI API key; can also load from env var, etc.
+    openai.api_key = "YOUR_API_KEY_HERE"
+    r = openai.ChatCompletion.create(
+        model="gpt-4",
+        messages=[{"role": "user", "content": prompt}]
+    )
+    return r.choices[0].message.content
+
+# 2) Create a shared store (dict) for Node/Flow data exchange.
+#    This can be replaced with a DB or other storage.
+#    Design the structure / schema based on the app requirements.
+shared = {"data": {}, "summary": {}}
+
+# 3) Create a Node that loads data from disk into shared['data'].
+class LoadData(Node):
+    # For compute-intensive operations, do them in prep().
+    def prep(self, shared):
+        path = "../data/PaulGrahamEssaysLarge"
+        for filename in os.listdir(path):
+            with open(os.path.join(path, filename), 'r') as f:
+                shared['data'][filename] = f.read()
+    # If LLM was needed, we'd handle it in exec(). Not needed here.
+    # (idempotent so it can be retried if needed)
+    def exec(self,shared,prep_res): pass 
+    # post() can update shared again or decide the next node (by return the action).
+    def post(self,shared,prep_res,exec_res): pass 
+
+load_data = LoadData()
+# Run the data-loading node once
+load_data.run(shared)
+
+# 4) Create a Node that summarizes a single file using the LLM.
+class SummarizeFile(Node):
+    def prep(self, shared):
+        # Use self.params (which must remain immutable during prep/exec/post).
+        # Typically, we only store identifying info in params (e.g., filename).
+        content = shared['data'][self.params['filename']]
+        return content
+    def exec(self, shared, prep_res):
+        content = prep_res
+        prompt = f"{content} Respond a summary of above in 10 words"
+        summary = call_llm(prompt)
+        return summary
+    def post(self, shared, prep_res, exec_res):
+        shared["summary"][self.params['filename']] = exec_res
+
+summarize_file = SummarizeFile()
+# For testing, we set params directly on the node.
+# In real usage, you'd set them in a Flow or BatchFlow.
+summarize_file.set_params({"filename":"addiction.txt"})
+summarize_file.run(shared)
+
+# 5) If data is large, we can apply a map-reduce pattern:
+#    - MapSummaries(BatchNode) => chunk the file and summarize each chunk
+#    - ReduceSummaries(Node)   => combine those chunk-level summaries
+class MapSummaries(BatchNode):
+    def prep(self, shared):
+        content = shared['data'][self.params['filename']]
+        chunk_size = 10000
+        chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
+        # Must return an iterable (list or generator) for a BatchNode.
+        return chunks
+    def exec(self, shared, prep_res):
+        # Each iteration of prep_res corresponds to a single chunk.
+        chunk = prep_res
+        prompt = f"{chunk} Respond a summary of above in 10 words"
+        summary = call_llm(prompt)
+        return summary
+    def post(self, shared, prep_res, exec_res):
+        # exec_res is a list of exec() results (summaries for each chunk).
+        combined_summary = [f"{i}. {summary}" for i, summary in enumerate(exec_res)]
+        shared["summary"][self.params['filename']] = combined_summary
+
+class ReduceSummaries(Node):
+    def prep(self, shared):
+        # Retrieve the list of chunk summaries from shared storage
+        return shared["summary"][self.params['filename']]
+    def exec(self, shared, prep_res):
+        combined_summary = prep_res
+        prompt = f"{combined_summary} Respond a summary of above in 10 words"
+        summary = call_llm(prompt)
+        return summary
+    def post(self, shared, prep_res, exec_res):
+        # Store the combined summary as the final summary for this file.
+        shared["summary"][self.params['filename']] = exec_res
+        
+map_summaries = MapSummaries()
+reduce_summaries = ReduceSummaries()
+# Link map_summaries to reduce_summaries with an action
+# By default, the action is "default" (when post returns None, it takes "default" action)
+# This is the same as map_summaries - "default" >> reduce_summaries
+map_summaries >> reduce_summaries
+
+# We don't directly call map_summaries.run(shared), 
+# because that alone would process only the map step without reduce.
+
+# 6) Instead, create a Flow that starts from map_summaries (a Node) 
+#    and automatically includes reduce_summaries. 
+#    Note: A Flow can also start from any other Flow or BatchFlow.
+
+
+file_summary_flow = Flow(start=map_summaries)
+# When a flow params is set, it will recursively set its params to all nodes in the flow
+file_summary_flow.set_params({"filename":"before.txt"})
+file_summary_flow.run(shared)
+
+# 7) Summarize all files using a BatchFlow that reruns file_summary_flow for each file
+class SummarizeAllFiles(BatchFlow):
+    def prep(self, shared):
+        # Return a list of parameters to apply in each flow iteration.
+        # Each individual param will be merged with this node's own params 
+        # Allowing nesting of multi-level BatchFlow. 
+        # E.g., first level diretcory, second level file.
+        return [{"filename":filename} for filename in shared['data']]
+
+summarize_all_files = SummarizeAllFiles(start=file_summary_flow)
+summarize_all_files.run(shared)
+
+
+# 8) QA Agent: Find the most relevant file based on summary with actions
+#    if no question is asked:
+#       (a) end: terminate the flow 
+#    if question is asked:
+#         if relevant file is found:
+#            (b) answer: move to answer node and read the whole file to answer the question
+#         if no relevant file is found:
+#            (c) retry: retry the process to find the relevant file
+class FindRelevantFile(Node):
+    def prep(self, shared):
+        question = input("Enter a question: ")
+        formatted_list = [f"- '{filename}': {shared['summary'][filename]}" 
+                         for filename in shared['summary']]
+        return question, formatted_list
+    def exec(self, shared, prep_res):
+        question, formatted_list = prep_res
+        if not question:
+            return {"think":"no question", "has_relevant":False}
+        # Provide a structured YAML output that includes:
+        # - The chain of thought
+        # - Whether any relevant file was found
+        # - The most relevant file if found
+        prompt = f"""Question: {question} 
+Find the most relevant file from: 
+{formatted_list}
+If no relevant file, explain why
+Respond in yaml without additional information:
+think: the question has/has no relevant file ...
+has_relevant: true/false
+most_relevant: filename"""
+        response = call_llm(prompt)
+        import yaml
+        result = yaml.safe_load(response)
+        # Ensure required fields are present
+        assert "think" in result
+        assert "has_relevant" in result
+        assert "most_relevant" in result if result["has_relevant"] else True
+        return result
+    # handle errors by returning a default response in case of exception after retries
+    def process_after_fail(self,shared,prep_res,exc):
+        # if not overridden, the default is to throw the exception
+        return {"think":"error finding the file", "has_relevant":False}
+    def post(self, shared, prep_res, exec_res):
+        question, _ = prep_res
+        # Decide what to do next based on the results
+        if not question:
+            print(f"No question asked")
+            return "end"
+        if exec_res["has_relevant"]:
+            # Store the question and most relevant file in shared
+            shared["question"] = question
+            shared["relevant_file"] = exec_res['most_relevant']
+            print(f"Relevant file found: {exec_res['most_relevant']}")
+            return "answer"
+        else:
+            print(f"No relevant file found: {exec_res['think']}")
+            return "retry"
+
+class AnswerQuestion(Node):
+    def prep(self, shared):
+        question = shared['question']
+        relevant_file = shared['relevant_file']
+        # Read the whole file content
+        file_content = shared['data'][relevant_file]
+        return question, file_content
+    def exec(self, shared, prep_res):
+        question, file_content = prep_res
+        prompt = f"""Question: {question}
+File: {file_content}
+Answer the question in 50 words"""
+        response = call_llm(prompt)
+        return response
+    def post(self, shared, prep_res, exec_res):
+        print(f"Answer: {exec_res}")
+
+class NoOp(Node):
+    pass
+
+# Configure the QA agent with appropriate transitions and retries
+find_relevant_file = FindRelevantFile(max_retries=3)
+answer_question = AnswerQuestion()
+no_op = NoOp()
+
+# Connect the nodes based on the actions they return
+find_relevant_file - "answer" >> answer_question >> find_relevant_file
+find_relevant_file - "retry" >> find_relevant_file
+find_relevant_file - "end" >> no_op
+
+qa_agent = Flow(start=find_relevant_file)
+qa_agent.run(shared)
+
+
+# Above example demonstrates the use of minillmflow
+# Next, build another app based on the same principles
+# First, given the app's requirements, design the Node/Flow structure
+# Then, design the data structure within shared storage, and how it's updated
+# Finally, implement the Nodes and Flows to achieve the desired functionality
\ No newline at end of file
diff --git a/minillmflow/__init__.py b/minillmflow/__init__.py
index 96305bc..9a5555b 100644
--- a/minillmflow/__init__.py
+++ b/minillmflow/__init__.py
@@ -3,9 +3,9 @@ import asyncio, warnings
 class BaseNode:
     def __init__(self): self.params,self.successors={},{}
     def set_params(self,params): self.params=params
-    def add_successor(self,node,cond="default"):
-        if cond in self.successors: warnings.warn(f"Overwriting successor for condition '{cond}'")
-        self.successors[cond]=node;return node
+    def add_successor(self,node,action="default"):
+        if action in self.successors: warnings.warn(f"Overwriting successor for action '{action}'")
+        self.successors[action]=node;return node
     def prep(self,shared): return None
     def exec(self,shared,prep_res): return None
     def _exec(self,shared,prep_res): return self.exec(shared,prep_res)
@@ -18,13 +18,13 @@ class BaseNode:
         if self.successors: warnings.warn("Node won't run successors. Use a parent Flow instead.")
         return self._run(shared)
     def __rshift__(self,other): return self.add_successor(other)
-    def __sub__(self,cond):
-        if isinstance(cond,str): return _ConditionalTransition(self,cond)
-        raise TypeError("Condition must be a string")
+    def __sub__(self,action):
+        if isinstance(action,str): return _ConditionalTransition(self,action)
+        raise TypeError("Action must be a string")
 
 class _ConditionalTransition:
-    def __init__(self,src,cond): self.src,self.cond=src,cond
-    def __rshift__(self,tgt): return self.src.add_successor(tgt,self.cond)
+    def __init__(self,src,action): self.src,self.action=src,action
+    def __rshift__(self,tgt): return self.src.add_successor(tgt,self.action)
 
 class Node(BaseNode):
     def __init__(self,max_retries=1): 
@@ -42,16 +42,16 @@ class BatchNode(Node):
     def _exec(self,shared,items): return [super(Node,self)._exec(shared,i) for i in items]
 
 class Flow(BaseNode):
-    def __init__(self,start_node):
+    def __init__(self,start):
         super().__init__()
-        self.start_node=start_node
-    def get_next_node(self,curr,cond):
-        nxt=curr.successors.get(cond if cond is not None else "default")
+        self.start=start
+    def get_next_node(self,curr,action):
+        nxt=curr.successors.get(action if action is not None else "default")
         if not nxt and curr.successors: 
-            warnings.warn(f"Flow ends: condition '{cond}' not found in {list(curr.successors)}")
+            warnings.warn(f"Flow ends: action '{action}' not found in {list(curr.successors)}")
         return nxt
     def _exec(self,shared,params=None):
-        curr,p=self.start_node,(params if params else {**self.params})
+        curr,p=self.start,(params if params else {**self.params})
         while curr:
             curr.set_params(p)
             c=curr._run(shared)
@@ -83,7 +83,7 @@ class AsyncNode(Node):
 
 class AsyncFlow(Flow,AsyncNode):
     async def _exec_async(self,shared,params=None):
-        curr,p=self.start_node,(params if params else {**self.params})
+        curr,p=self.start,(params if params else {**self.params})
         while curr:
             curr.set_params(p)
             c=await curr._run_async(shared) if hasattr(curr,"run_async") else curr._run(shared)
diff --git a/setup.py b/setup.py
index 97caa74..3a38bbe 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 
 setup(
     name="minillmflow",
-    version="0.0.0",
+    version="0.0.2",
     packages=find_packages(),
     author="Zachary Huang",
     author_email="zh2408@columbia.edu",
diff --git a/tests/test_async_batch_flow.py b/tests/test_async_batch_flow.py
index e0e911e..c8c7855 100644
--- a/tests/test_async_batch_flow.py
+++ b/tests/test_async_batch_flow.py
@@ -46,7 +46,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = SimpleTestAsyncBatchFlow(start_node=self.process_node)
+        flow = SimpleTestAsyncBatchFlow(start=self.process_node)
         asyncio.run(flow.run_async(shared_storage))
 
         expected_results = {
@@ -66,7 +66,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
             'input_data': {}
         }
 
-        flow = EmptyTestAsyncBatchFlow(start_node=self.process_node)
+        flow = EmptyTestAsyncBatchFlow(start=self.process_node)
         asyncio.run(flow.run_async(shared_storage))
 
         self.assertEqual(shared_storage.get('results', {}), {})
@@ -85,7 +85,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = ErrorTestAsyncBatchFlow(start_node=AsyncErrorNode())
+        flow = ErrorTestAsyncBatchFlow(start=AsyncErrorNode())
         
         with self.assertRaises(ValueError):
             asyncio.run(flow.run_async(shared_storage))
@@ -126,7 +126,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = NestedAsyncBatchFlow(start_node=inner_node)
+        flow = NestedAsyncBatchFlow(start=inner_node)
         asyncio.run(flow.run_async(shared_storage))
 
         expected_results = {
@@ -162,7 +162,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = CustomParamAsyncBatchFlow(start_node=CustomParamAsyncNode())
+        flow = CustomParamAsyncBatchFlow(start=CustomParamAsyncNode())
         asyncio.run(flow.run_async(shared_storage))
 
         expected_results = {
diff --git a/tests/test_async_flow.py b/tests/test_async_flow.py
index bda8ec5..ceb5a9a 100644
--- a/tests/test_async_flow.py
+++ b/tests/test_async_flow.py
@@ -86,14 +86,14 @@ class TestAsyncFlow(unittest.TestCase):
         """
 
         # Create our nodes
-        start_node = AsyncNumberNode(5)
+        start = AsyncNumberNode(5)
         inc_node = AsyncIncrementNode()
 
-        # Chain them: start_node >> inc_node
-        start_node - "number_set" >> inc_node
+        # Chain them: start >> inc_node
+        start - "number_set" >> inc_node
 
-        # Create an AsyncFlow with start_node
-        flow = AsyncFlow(start_node)
+        # Create an AsyncFlow with start
+        flow = AsyncFlow(start)
 
         # We'll run the flow synchronously (which under the hood is asyncio.run())
         shared_storage = {}
@@ -135,15 +135,15 @@ class TestAsyncFlow(unittest.TestCase):
 
         shared_storage = {"value": 10}
 
-        start_node = BranchingAsyncNode()
+        start = BranchingAsyncNode()
         positive_node = PositiveNode()
         negative_node = NegativeNode()
 
         # Condition-based chaining
-        start_node - "positive_branch" >> positive_node
-        start_node - "negative_branch" >> negative_node
+        start - "positive_branch" >> positive_node
+        start - "negative_branch" >> negative_node
 
-        flow = AsyncFlow(start_node)
+        flow = AsyncFlow(start)
         asyncio.run(flow.run_async(shared_storage))
 
         self.assertEqual(shared_storage["path"], "positive", 
diff --git a/tests/test_batch_flow.py b/tests/test_batch_flow.py
index cd2463b..6706175 100644
--- a/tests/test_batch_flow.py
+++ b/tests/test_batch_flow.py
@@ -40,7 +40,7 @@ class TestBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = SimpleTestBatchFlow(start_node=self.process_node)
+        flow = SimpleTestBatchFlow(start=self.process_node)
         flow.run(shared_storage)
 
         expected_results = {
@@ -60,7 +60,7 @@ class TestBatchFlow(unittest.TestCase):
             'input_data': {}
         }
 
-        flow = EmptyTestBatchFlow(start_node=self.process_node)
+        flow = EmptyTestBatchFlow(start=self.process_node)
         flow.run(shared_storage)
 
         self.assertEqual(shared_storage.get('results', {}), {})
@@ -77,7 +77,7 @@ class TestBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = SingleItemBatchFlow(start_node=self.process_node)
+        flow = SingleItemBatchFlow(start=self.process_node)
         flow.run(shared_storage)
 
         expected_results = {
@@ -99,7 +99,7 @@ class TestBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = ErrorTestBatchFlow(start_node=ErrorProcessNode())
+        flow = ErrorTestBatchFlow(start=ErrorProcessNode())
         
         with self.assertRaises(ValueError):
             flow.run(shared_storage)
@@ -136,7 +136,7 @@ class TestBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = NestedBatchFlow(start_node=inner_node)
+        flow = NestedBatchFlow(start=inner_node)
         flow.run(shared_storage)
 
         expected_results = {
@@ -170,7 +170,7 @@ class TestBatchFlow(unittest.TestCase):
             }
         }
 
-        flow = CustomParamBatchFlow(start_node=CustomParamNode())
+        flow = CustomParamBatchFlow(start=CustomParamNode())
         flow.run(shared_storage)
 
         expected_results = {
diff --git a/tests/test_batch_node.py b/tests/test_batch_node.py
index 8f3a145..06f4703 100644
--- a/tests/test_batch_node.py
+++ b/tests/test_batch_node.py
@@ -74,7 +74,7 @@ class TestBatchNode(unittest.TestCase):
         chunk_node >> reduce_node
         
         # Create and run pipeline
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
         pipeline.run(shared_storage)
         
         self.assertEqual(shared_storage['total'], expected_sum)
@@ -95,7 +95,7 @@ class TestBatchNode(unittest.TestCase):
         reduce_node = SumReduceNode()
         
         chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
         pipeline.run(shared_storage)
         
         self.assertEqual(shared_storage['total'], expected_sum)
@@ -116,7 +116,7 @@ class TestBatchNode(unittest.TestCase):
         reduce_node = SumReduceNode()
         
         chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
         pipeline.run(shared_storage)
         
         self.assertEqual(shared_storage['total'], expected_sum)
@@ -136,7 +136,7 @@ class TestBatchNode(unittest.TestCase):
         reduce_node = SumReduceNode()
         
         chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
         pipeline.run(shared_storage)
         
         self.assertEqual(shared_storage['total'], expected_sum)
@@ -153,7 +153,7 @@ class TestBatchNode(unittest.TestCase):
         reduce_node = SumReduceNode()
         
         chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
         pipeline.run(shared_storage)
         
         self.assertEqual(shared_storage['total'], 0)
diff --git a/tests/test_flow_basic.py b/tests/test_flow_basic.py
index 3f59744..a09d51c 100644
--- a/tests/test_flow_basic.py
+++ b/tests/test_flow_basic.py
@@ -45,7 +45,7 @@ class TestNode(unittest.TestCase):
     def test_single_number(self):
         shared_storage = {}
         start = NumberNode(5)
-        pipeline = Flow(start_node=start)
+        pipeline = Flow(start=start)
         pipeline.run(shared_storage)
         self.assertEqual(shared_storage['current'], 5)
 
@@ -65,7 +65,7 @@ class TestNode(unittest.TestCase):
         # Chain them in sequence using the >> operator
         n1 >> n2 >> n3
 
-        pipeline = Flow(start_node=n1)
+        pipeline = Flow(start=n1)
         pipeline.run(shared_storage)
 
         self.assertEqual(shared_storage['current'], 16)
@@ -94,7 +94,7 @@ class TestNode(unittest.TestCase):
         check - "positive" >> add_if_positive
         check - "negative" >> add_if_negative
 
-        pipeline = Flow(start_node=start)
+        pipeline = Flow(start=start)
         pipeline.run(shared_storage)
 
         self.assertEqual(shared_storage['current'], 15)
@@ -118,7 +118,7 @@ class TestNode(unittest.TestCase):
         check - "positive" >> add_if_positive
         check - "negative" >> add_if_negative
 
-        pipeline = Flow(start_node=start)
+        pipeline = Flow(start=start)
         pipeline.run(shared_storage)
 
         # Should have gone down the 'negative' branch
@@ -145,7 +145,7 @@ class TestNode(unittest.TestCase):
         # Attach a no-op node on the negative branch to avoid warning
         check - 'negative' >> no_op
 
-        pipeline = Flow(start_node=n1)
+        pipeline = Flow(start=n1)
         pipeline.run(shared_storage)
 
         # final result should be -2: (10 -> 7 -> 4 -> 1 -> -2)
diff --git a/tests/test_flow_composition.py b/tests/test_flow_composition.py
index bac8fa0..84403b0 100644
--- a/tests/test_flow_composition.py
+++ b/tests/test_flow_composition.py
@@ -35,21 +35,21 @@ class TestFlowComposition(unittest.TestCase):
     def test_flow_as_node(self):
         """
         1) Create a Flow (f1) starting with NumberNode(5), then AddNode(10), then MultiplyNode(2).
-        2) Create a second Flow (f2) whose start_node is f1.
+        2) Create a second Flow (f2) whose start is f1.
         3) Create a wrapper Flow (f3) that contains f2 to ensure proper execution.
         Expected final result in shared_storage['current']: (5 + 10) * 2 = 30.
         """
         shared_storage = {}
         
         # Inner flow f1
-        f1 = Flow(start_node=NumberNode(5))
+        f1 = Flow(start=NumberNode(5))
         f1 >> AddNode(10) >> MultiplyNode(2)
         
         # f2 starts with f1
-        f2 = Flow(start_node=f1)
+        f2 = Flow(start=f1)
         
         # Wrapper flow f3 to ensure proper execution
-        f3 = Flow(start_node=f2)
+        f3 = Flow(start=f2)
         f3.run(shared_storage)
         
         self.assertEqual(shared_storage['current'], 30)
@@ -65,15 +65,15 @@ class TestFlowComposition(unittest.TestCase):
         shared_storage = {}
         
         # Build the inner flow
-        inner_flow = Flow(start_node=NumberNode(5))
+        inner_flow = Flow(start=NumberNode(5))
         inner_flow >> AddNode(3)
         
-        # Build the middle flow, whose start_node is the inner flow
-        middle_flow = Flow(start_node=inner_flow)
+        # Build the middle flow, whose start is the inner flow
+        middle_flow = Flow(start=inner_flow)
         middle_flow >> MultiplyNode(4)
         
         # Wrapper flow to ensure proper execution
-        wrapper_flow = Flow(start_node=middle_flow)
+        wrapper_flow = Flow(start=middle_flow)
         wrapper_flow.run(shared_storage)
         
         self.assertEqual(shared_storage['current'], 32)
@@ -91,16 +91,16 @@ class TestFlowComposition(unittest.TestCase):
         # flow1
         numbernode = NumberNode(10)
         numbernode >> AddNode(10)
-        flow1 = Flow(start_node=numbernode)
+        flow1 = Flow(start=numbernode)
 
         # flow2
-        flow2 = Flow(start_node=MultiplyNode(2))
+        flow2 = Flow(start=MultiplyNode(2))
 
         # Chain flow1 to flow2
         flow1 >> flow2
 
         # Wrapper flow to ensure proper execution
-        wrapper_flow = Flow(start_node=flow1)
+        wrapper_flow = Flow(start=flow1)
         wrapper_flow.run(shared_storage)
         
         self.assertEqual(shared_storage['current'], 40)