add examples

2024-12-27 05:29:24 +00:00 · 2024-12-27 05:29:24 +00:00 · 0dde58d684
parent 993e024ec4
commit 0dde58d684
16 changed files with 1753 additions and 59 deletions
--- a/.gitignore
+++ b/.gitignore
@ -71,4 +71,5 @@ htmlcov/
 *.temp
-test.ipynb
+test.ipynb
 .pytest_cache/
--- a/cookbook/demo.ipynb
+++ b/cookbook/demo.ipynb
@ -0,0 +1,267 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No relevant file found: the question has no relevant file because while some files discuss startups, none specifically address how to find or generate startup ideas\n",
      "No question asked\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'default'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Example App for text summarization & QA using minillmflow\n",
    "from minillmflow import Node, BatchNode, Flow, BatchFlow, AsyncNode, AsyncFlow, BatchAsyncFlow\n",
    "import os\n",
    "\n",
    "# 1) Implement a simple LLM helper (OpenAI in this example).\n",
    "def call_LLM(prompt):\n",
    "    # Users must set an OpenAI API key; can also load from env var, etc.\n",
    "    openai.api_key = \"YOUR_API_KEY_HERE\"\n",
    "    r = openai.ChatCompletion.create(\n",
    "        model=\"gpt-4\",\n",
    "        messages=[{\"role\": \"user\", \"content\": prompt}]\n",
    "    )\n",
    "    return r.choices[0].message.content\n",
    "\n",
    "# 2) Create a shared store (dict) for Node/Flow data exchange.\n",
    "#    This can be replaced with a DB or other storage.\n",
    "#    Design the structure / schema based on the app requirements.\n",
    "shared = {\"data\": {}, \"summary\": {}}\n",
    "\n",
    "# 3) Create a Node that loads data from disk into shared['data'].\n",
    "class LoadData(Node):\n",
    "    # For compute-intensive operations, do them in prep().\n",
    "    def prep(self, shared):\n",
    "        path = \"../data/PaulGrahamEssaysLarge\"\n",
    "        for filename in os.listdir(path):\n",
    "            with open(os.path.join(path, filename), 'r') as f:\n",
    "                shared['data'][filename] = f.read()\n",
    "    # If LLM was needed, we'd handle it in exec(). Not needed here.\n",
    "    # (idempotent so it can be retried if needed)\n",
    "    def exec(self,shared,prep_res): pass \n",
    "    # post() can update shared again or decide the next node (by return the action).\n",
    "    def post(self,shared,prep_res,exec_res): pass \n",
    "\n",
    "load_data = LoadData()\n",
    "# Run the data-loading node once\n",
    "load_data.run(shared)\n",
    "\n",
    "# 4) Create a Node that summarizes a single file using the LLM.\n",
    "class SummarizeFile(Node):\n",
    "    def prep(self, shared):\n",
    "        # Use self.params (which must remain immutable during prep/exec/post).\n",
    "        # Typically, we only store identifying info in params (e.g., filename).\n",
    "        content = shared['data'][self.params['filename']]\n",
    "        return content\n",
    "    def exec(self, shared, prep_res):\n",
    "        content = prep_res\n",
    "        prompt = f\"{content} Respond a summary of above in 10 words\"\n",
    "        summary = call_llm(prompt)\n",
    "        return summary\n",
    "    def post(self, shared, prep_res, exec_res):\n",
    "        shared[\"summary\"][self.params['filename']] = exec_res\n",
    "\n",
    "summarize_file = SummarizeFile()\n",
    "# For testing, we set params directly on the node.\n",
    "# In real usage, you'd set them in a Flow or BatchFlow.\n",
    "summarize_file.set_params({\"filename\":\"addiction.txt\"})\n",
    "summarize_file.run(shared)\n",
    "\n",
    "# 5) If data is large, we can apply a map-reduce pattern:\n",
    "#    - MapSummaries(BatchNode) => chunk the file and summarize each chunk\n",
    "#    - ReduceSummaries(Node)   => combine those chunk-level summaries\n",
    "class MapSummaries(BatchNode):\n",
    "    def prep(self, shared):\n",
    "        content = shared['data'][self.params['filename']]\n",
    "        chunk_size = 10000\n",
    "        chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]\n",
    "        # Must return an iterable (list or generator) for a BatchNode.\n",
    "        return chunks\n",
    "    def exec(self, shared, prep_res):\n",
    "        # Each iteration of prep_res corresponds to a single chunk.\n",
    "        chunk = prep_res\n",
    "        prompt = f\"{chunk} Respond a summary of above in 10 words\"\n",
    "        summary = call_llm(prompt)\n",
    "        return summary\n",
    "    def post(self, shared, prep_res, exec_res):\n",
    "        # exec_res is a list of exec() results (summaries for each chunk).\n",
    "        combined_summary = [f\"{i}. {summary}\" for i, summary in enumerate(exec_res)]\n",
    "        shared[\"summary\"][self.params['filename']] = combined_summary\n",
    "\n",
    "class ReduceSummaries(Node):\n",
    "    def prep(self, shared):\n",
    "        # Retrieve the list of chunk summaries from shared storage\n",
    "        return shared[\"summary\"][self.params['filename']]\n",
    "    def exec(self, shared, prep_res):\n",
    "        combined_summary = prep_res\n",
    "        prompt = f\"{combined_summary} Respond a summary of above in 10 words\"\n",
    "        summary = call_llm(prompt)\n",
    "        return summary\n",
    "    def post(self, shared, prep_res, exec_res):\n",
    "        # Store the combined summary as the final summary for this file.\n",
    "        shared[\"summary\"][self.params['filename']] = exec_res\n",
    "        \n",
    "map_summaries = MapSummaries()\n",
    "reduce_summaries = ReduceSummaries()\n",
    "# Link map_summaries to reduce_summaries with an action\n",
    "# By default, the action is \"default\" (when post returns None, it takes \"default\" action)\n",
    "# This is the same as map_summaries - \"default\" >> reduce_summaries\n",
    "map_summaries >> reduce_summaries\n",
    "\n",
    "# We don't directly call map_summaries.run(shared), \n",
    "# because that alone would process only the map step without reduce.\n",
    "\n",
    "# 6) Instead, create a Flow that starts from map_summaries (a Node) \n",
    "#    and automatically includes reduce_summaries. \n",
    "#    Note: A Flow can also start from any other Flow or BatchFlow.\n",
    "\n",
    "\n",
    "file_summary_flow = Flow(start=map_summaries)\n",
    "# When a flow params is set, it will recursively set its params to all nodes in the flow\n",
    "file_summary_flow.set_params({\"filename\":\"before.txt\"})\n",
    "file_summary_flow.run(shared)\n",
    "\n",
    "# 7) Summarize all files using a BatchFlow that reruns file_summary_flow for each file\n",
    "class SummarizeAllFiles(BatchFlow):\n",
    "    def prep(self, shared):\n",
    "        # Return a list of parameters to apply in each flow iteration.\n",
    "        # Each individual param will be merged with this node's own params \n",
    "        # Allowing nesting of multi-level BatchFlow. \n",
    "        # E.g., first level diretcory, second level file.\n",
    "        return [{\"filename\":filename} for filename in shared['data']]\n",
    "\n",
    "summarize_all_files = SummarizeAllFiles(start=file_summary_flow)\n",
    "summarize_all_files.run(shared)\n",
    "\n",
    "\n",
    "# 8) QA Agent: Find the most relevant file based on summary with actions\n",
    "#    if no question is asked:\n",
    "#       (a) end: terminate the flow \n",
    "#    if question is asked:\n",
    "#         if relevant file is found:\n",
    "#            (b) answer: move to answer node and read the whole file to answer the question\n",
    "#         if no relevant file is found:\n",
    "#            (c) retry: retry the process to find the relevant file\n",
    "class FindRelevantFile(Node):\n",
    "    def prep(self, shared):\n",
    "        question = input(\"Enter a question: \")\n",
    "        formatted_list = [f\"- '{filename}': {shared['summary'][filename]}\" \n",
    "                         for filename in shared['summary']]\n",
    "        return question, formatted_list\n",
    "    def exec(self, shared, prep_res):\n",
    "        question, formatted_list = prep_res\n",
    "        if not question:\n",
    "            return {\"think\":\"no question\", \"has_relevant\":False}\n",
    "        # Provide a structured YAML output that includes:\n",
    "        # - The chain of thought\n",
    "        # - Whether any relevant file was found\n",
    "        # - The most relevant file if found\n",
    "        prompt = f\"\"\"Question: {question} \n",
    "Find the most relevant file from: \n",
    "{formatted_list}\n",
    "If no relevant file, explain why\n",
    "Respond in yaml without additional information:\n",
    "think: the question has/has no relevant file ...\n",
    "has_relevant: true/false\n",
    "most_relevant: filename\"\"\"\n",
    "        response = call_llm(prompt)\n",
    "        import yaml\n",
    "        result = yaml.safe_load(response)\n",
    "        # Ensure required fields are present\n",
    "        assert \"think\" in result\n",
    "        assert \"has_relevant\" in result\n",
    "        assert \"most_relevant\" in result if result[\"has_relevant\"] else True\n",
    "        return result\n",
    "    # handle errors by returning a default response in case of exception after retries\n",
    "    def process_after_fail(self,shared,prep_res,exc):\n",
    "        # if not overridden, the default is to throw the exception\n",
    "        return {\"think\":\"error finding the file\", \"has_relevant\":False}\n",
    "    def post(self, shared, prep_res, exec_res):\n",
    "        question, _ = prep_res\n",
    "        # Decide what to do next based on the results\n",
    "        if not question:\n",
    "            print(f\"No question asked\")\n",
    "            return \"end\"\n",
    "        if exec_res[\"has_relevant\"]:\n",
    "            # Store the question and most relevant file in shared\n",
    "            shared[\"question\"] = question\n",
    "            shared[\"relevant_file\"] = exec_res['most_relevant']\n",
    "            print(f\"Relevant file found: {exec_res['most_relevant']}\")\n",
    "            return \"answer\"\n",
    "        else:\n",
    "            print(f\"No relevant file found: {exec_res['think']}\")\n",
    "            return \"retry\"\n",
    "\n",
    "class AnswerQuestion(Node):\n",
    "    def prep(self, shared):\n",
    "        question = shared['question']\n",
    "        relevant_file = shared['relevant_file']\n",
    "        # Read the whole file content\n",
    "        file_content = shared['data'][relevant_file]\n",
    "        return question, file_content\n",
    "    def exec(self, shared, prep_res):\n",
    "        question, file_content = prep_res\n",
    "        prompt = f\"\"\"Question: {question}\n",
    "File: {file_content}\n",
    "Answer the question in 50 words\"\"\"\n",
    "        response = call_llm(prompt)\n",
    "        return response\n",
    "    def post(self, shared, prep_res, exec_res):\n",
    "        print(f\"Answer: {exec_res}\")\n",
    "\n",
    "class NoOp(Node):\n",
    "    pass\n",
    "\n",
    "# Configure the QA agent with appropriate transitions and retries\n",
    "find_relevant_file = FindRelevantFile(max_retries=3)\n",
    "answer_question = AnswerQuestion()\n",
    "no_op = NoOp()\n",
    "\n",
    "# Connect the nodes based on the actions they return\n",
    "find_relevant_file - \"answer\" >> answer_question >> find_relevant_file\n",
    "find_relevant_file - \"retry\" >> find_relevant_file\n",
    "find_relevant_file - \"end\" >> no_op\n",
    "\n",
    "qa_agent = Flow(start=find_relevant_file)\n",
    "qa_agent.run(shared)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/data/PaulGrahamEssaysLarge/addiction.txt
+++ b/data/PaulGrahamEssaysLarge/addiction.txt
@ -0,0 +1,116 @@
 July 2010What hard liquor, cigarettes, heroin, and crack have in common is
 that they're all more concentrated forms of less addictive predecessors.
 Most if not all the things we describe as addictive are.  And the
 scary thing is, the process that created them is accelerating.We wouldn't want to stop it.  It's the same process that cures
 diseases: technological progress.  Technological progress means
 making things do more of what we want.  When the thing we want is
 something we want to want, we consider technological progress good.
 If some new technique makes solar cells x% more efficient, that
 seems strictly better.  When progress concentrates something we
 don't want to want—when it transforms opium into heroin—it seems
 bad.  But it's the same process at work.
 [1]No one doubts this process is accelerating, which means increasing
 numbers of things we like will be transformed into things we like
 too much.
 [2]As far as I know there's no word for something we like too much.
 The closest is the colloquial sense of "addictive." That usage has
 become increasingly common during my lifetime.  And it's clear why:
 there are an increasing number of things we need it for.  At the
 extreme end of the spectrum are crack and meth.  Food has been
 transformed by a combination of factory farming and innovations in
 food processing into something with way more immediate bang for the
 buck, and you can see the results in any town in America.  Checkers
 and solitaire have been replaced by World of Warcraft and FarmVille.
 TV has become much more engaging, and even so it can't compete with Facebook.The world is more addictive than it was 40 years ago.   And unless
 the forms of technological progress that produced these things are
 subject to different laws than technological progress in general,
 the world will get more addictive in the next 40 years than it did
 in the last 40.The next 40 years will bring us some wonderful things.  I don't
 mean to imply they're all to be avoided.  Alcohol is a dangerous
 drug, but I'd rather live in a world with wine than one without.
 Most people can coexist with alcohol; but you have to be careful.
 More things we like will mean more things we have to be careful
 about.Most people won't, unfortunately.  Which means that as the world
 becomes more addictive, the two senses in which one can live a
 normal life will be driven ever further apart.  One sense of "normal"
 is statistically normal: what everyone else does.  The other is the
 sense we mean when we talk about the normal operating range of a
 piece of machinery: what works best.These two senses are already quite far apart.  Already someone
 trying to live well would seem eccentrically abstemious in most of
 the US.  That phenomenon is only going to become more pronounced.
 You can probably take it as a rule of thumb from now on that if
 people don't think you're weird, you're living badly.Societies eventually develop antibodies to addictive new things.
 I've seen that happen with cigarettes.  When cigarettes first
 appeared, they spread the way an infectious disease spreads through
 a previously isolated population.  Smoking rapidly became a
 (statistically) normal thing.  There were ashtrays everywhere.  We
 had ashtrays in our house when I was a kid, even though neither of
 my parents smoked.  You had to for guests.As knowledge spread about the dangers of smoking, customs changed.
 In the last 20 years, smoking has been transformed from something
 that seemed totally normal into a rather seedy habit: from something
 movie stars did in publicity shots to something small huddles of
 addicts do outside the doors of office buildings.  A lot of the
 change was due to legislation, of course, but the legislation
 couldn't have happened if customs hadn't already changed.It took a while though—on the order of 100 years.  And unless the
 rate at which social antibodies evolve can increase to match the
 accelerating rate at which technological progress throws off new
 addictions, we'll be increasingly unable to rely on customs to
 protect us.
 [3]
 Unless we want to be canaries in the coal mine
 of each new addiction—the people whose sad example becomes a
 lesson to future generations—we'll have to figure out for ourselves
 what to avoid and how.  It will actually become a reasonable strategy
 (or a more reasonable strategy) to suspect 
 everything new.In fact, even that won't be enough.  We'll have to worry not just
 about new things, but also about existing things becoming more
 addictive.  That's what bit me.  I've avoided most addictions, but
 the Internet got me because it became addictive while I was using
 it.
 [4]Most people I know have problems with Internet addiction.  We're
 all trying to figure out our own customs for getting free of it.
 That's why I don't have an iPhone, for example; the last thing I
 want is for the Internet to follow me out into the world.
 [5]
 My latest trick is taking long hikes.  I used to think running was a
 better form of exercise than hiking because it took less time.  Now
 the slowness of hiking seems an advantage, because the longer I
 spend on the trail, the longer I have to think without interruption.Sounds pretty eccentric, doesn't it?  It always will when you're
 trying to solve problems where there are no customs yet to guide
 you.  Maybe I can't plead Occam's razor; maybe I'm simply eccentric.
 But if I'm right about the acceleration of addictiveness, then this
 kind of lonely squirming to avoid it will increasingly be the fate
 of anyone who wants to get things done.  We'll increasingly be
 defined by what we say no to.
 Notes[1]
 Could you restrict technological progress to areas where you
 wanted it?  Only in a limited way, without becoming a police state.
 And even then your restrictions would have undesirable side effects.
 "Good" and "bad" technological progress aren't sharply differentiated,
 so you'd find you couldn't slow the latter without also slowing the
 former.  And in any case, as Prohibition and the "war on drugs"
 show, bans often do more harm than good.[2]
 Technology has always been accelerating.  By Paleolithic
 standards, technology evolved at a blistering pace in the Neolithic
 period.[3]
 Unless we mass produce social customs.  I suspect the recent
 resurgence of evangelical Christianity in the US is partly a reaction
 to drugs.  In desperation people reach for the sledgehammer; if
 their kids won't listen to them, maybe they'll listen to God.  But
 that solution has broader consequences than just getting kids to
 say no to drugs.  You end up saying no to 
 science as well.
 I worry we may be heading for a future in which only a few people
 plot their own itinerary through no-land, while everyone else books
 a package tour.  Or worse still, has one booked for them by the
 government.[4]
 People commonly use the word "procrastination" to describe
 what they do on the Internet.  It seems to me too mild to describe
 what's happening as merely not-doing-work.  We don't call it
 procrastination when someone gets drunk instead of working.[5]
 Several people have told me they like the iPad because it
 lets them bring the Internet into situations where a laptop would
 be too conspicuous.  In other words, it's a hip flask.  (This is
 true of the iPhone too, of course, but this advantage isn't as
 obvious because it reads as a phone, and everyone's used to those.)Thanks to Sam Altman, Patrick Collison, Jessica Livingston, and
 Robert Morris for reading drafts of this.
--- a/data/PaulGrahamEssaysLarge/aord.txt
+++ b/data/PaulGrahamEssaysLarge/aord.txt
@ -0,0 +1,126 @@
 October 2015When I talk to a startup that's been operating for more than 8 or
 9 months, the first thing I want to know is almost always the same.
 Assuming their expenses remain constant and their revenue growth
 is what it has been over the last several months, do they make it to
 profitability on the money they have left?  Or to put it more
 dramatically, by default do they live or die?The startling thing is how often the founders themselves don't know.
 Half the founders I talk to don't know whether they're default alive
 or default dead.If you're among that number, Trevor Blackwell has made a handy
 calculator you can use to find out.The reason I want to know first whether a startup is default alive
 or default dead is that the rest of the conversation depends on the
 answer.  If the company is default alive, we can talk about ambitious
 new things they could do.  If it's default dead, we probably need
 to talk about how to save it.  We know the current trajectory ends
 badly.  How can they get off that trajectory?Why do so few founders know whether they're default alive or default
 dead?  Mainly, I think, because they're not used to asking that.
 It's not a question that makes sense to ask early on, any more than
 it makes sense to ask a 3 year old how he plans to support
 himself.  But as the company grows older, the question switches from
 meaningless to critical.  That kind of switch often takes people
 by surprise.I propose the following solution: instead of starting to ask too
 late whether you're default alive or default dead, start asking too
 early.  It's hard to say precisely when the question switches
 polarity.  But it's probably not that dangerous to start worrying
 too early that you're default dead, whereas it's very dangerous to
 start worrying too late.The reason is a phenomenon I wrote about earlier: the
 fatal pinch.
 The fatal pinch is default dead + slow growth + not enough
 time to fix it.  And the way founders end up in it is by not realizing
 that's where they're headed.There is another reason founders don't ask themselves whether they're
 default alive or default dead: they assume it will be easy to raise
 more money.  But that assumption is often false, and worse still, the
 more you depend on it, the falser it becomes.Maybe it will help to separate facts from hopes. Instead of thinking
 of the future with vague optimism, explicitly separate the components.
 Say "We're default dead, but we're counting on investors to save
 us." Maybe as you say that, it will set off the same alarms in your
 head that it does in mine.  And if you set off the alarms sufficiently
 early, you may be able to avoid the fatal pinch.It would be safe to be default dead if you could count on investors
 saving you.  As a rule their interest is a function of
 growth.  If you have steep revenue growth, say over 5x a year, you
 can start to count on investors being interested even if you're not
 profitable.
 [1]
 But investors are so fickle that you can never
 do more than start to count on them.  Sometimes something about your
 business will spook investors even if your growth is great.  So no
 matter how good your growth is, you can never safely treat fundraising
 as more than a plan A. You should always have a plan B as well: you
 should know (as in write down) precisely what you'll need to do to
 survive if you can't raise more money, and precisely when you'll 
 have to switch to plan B if plan A isn't working.In any case, growing fast versus operating cheaply is far from the
 sharp dichotomy many founders assume it to be.  In practice there
 is surprisingly little connection between how much a startup spends
 and how fast it grows.  When a startup grows fast, it's usually
 because the product hits a nerve, in the sense of hitting some big
 need straight on.  When a startup spends a lot, it's usually because
 the product is expensive to develop or sell, or simply because
 they're wasteful.If you're paying attention, you'll be asking at this point not just
 how to avoid the fatal pinch, but how to avoid being default dead.
 That one is easy: don't hire too fast.  Hiring too fast is by far
 the biggest killer of startups that raise money.
 [2]Founders tell themselves they need to hire in order to grow.  But
 most err on the side of overestimating this need rather than
 underestimating it.  Why?  Partly because there's so much work to
 do.  Naive founders think that if they can just hire enough
 people, it will all get done.  Partly because successful startups have
 lots of employees, so it seems like that's what one does in order
 to be successful.  In fact the large staffs of successful startups
 are probably more the effect of growth than the cause.  And
 partly because when founders have slow growth they don't want to
 face what is usually the real reason: the product is not appealing
 enough.Plus founders who've just raised money are often encouraged to
 overhire by the VCs who funded them.  Kill-or-cure strategies are
 optimal for VCs because they're protected by the portfolio effect.
 VCs want to blow you up, in one sense of the phrase or the other.
 But as a founder your incentives are different.  You want above all
 to survive.
 [3]Here's a common way startups die.  They make something moderately
 appealing and have decent initial growth. They raise their first
 round fairly easily, because the founders seem smart and the idea
 sounds plausible. But because the product is only moderately
 appealing, growth is ok but not great.  The founders convince
 themselves that hiring a bunch of people is the way to boost growth.
 Their investors agree.  But (because the product is only moderately
 appealing) the growth never comes.  Now they're rapidly running out
 of runway.  They hope further investment will save them. But because
 they have high expenses and slow growth, they're now unappealing
 to investors. They're unable to raise more, and the company dies.What the company should have done is address the fundamental problem:
 that the product is only moderately appealing.  Hiring people is
 rarely the way to fix that.  More often than not it makes it harder.
 At this early stage, the product needs to evolve more than to be
 "built out," and that's usually easier with fewer people.
 [4]Asking whether you're default alive or default dead may save you
 from this.  Maybe the alarm bells it sets off will counteract the
 forces that push you to overhire.  Instead you'll be compelled to
 seek growth in other ways. For example, by doing
 things that don't scale, or by redesigning the product in the
 way only founders can.
 And for many if not most startups, these paths to growth will be
 the ones that actually work.Airbnb waited 4 months after raising money at the end of Y Combinator
 before they hired their first employee.  In the meantime the founders
 were terribly overworked.  But they were overworked evolving Airbnb
 into the astonishingly successful organism it is now.Notes[1]
 Steep usage growth will also interest investors.  Revenue
 will ultimately be a constant multiple of usage, so x% usage growth
 predicts x% revenue growth.  But in practice investors discount
 merely predicted revenue, so if you're measuring usage you need a
 higher growth rate to impress investors.[2]
 Startups that don't raise money are saved from hiring too
 fast because they can't afford to. But that doesn't mean you should
 avoid raising money in order to avoid this problem, any more than
 that total abstinence is the only way to avoid becoming an alcoholic.[3]
 I would not be surprised if VCs' tendency to push founders
 to overhire is not even in their own interest.  They don't know how
 many of the companies that get killed by overspending might have
 done well if they'd survived.  My guess is a significant number.[4]
 After reading a draft, Sam Altman wrote:"I think you should make the hiring point more strongly.  I think
 it's roughly correct to say that YC's most successful companies
 have never been the fastest to hire, and one of the marks of a great
 founder is being able to resist this urge."Paul Buchheit adds:"A related problem that I see a lot is premature scaling—founders
 take a small business that isn't really working (bad unit economics,
 typically) and then scale it up because they want impressive growth
 numbers. This is similar to over-hiring in that it makes the business
 much harder to fix once it's big, plus they are bleeding cash really
 fast."
 Thanks to Sam Altman, Paul Buchheit, Joe Gebbia, Jessica Livingston,
 and Geoff Ralston for reading drafts of this.
--- a/data/PaulGrahamEssaysLarge/apple.txt
+++ b/data/PaulGrahamEssaysLarge/apple.txt
@ -0,0 +1,201 @@
 Want to start a startup?  Get funded by
 Y Combinator.
 November 2009I don't think Apple realizes how badly the App Store approval process
 is broken.  Or rather, I don't think they realize how much it matters
 that it's broken.The way Apple runs the App Store has harmed their reputation with
 programmers more than anything else they've ever done. 
 Their reputation with programmers used to be great.
 It used to be the most common complaint you heard
 about Apple was that their fans admired them too uncritically.
 The App Store has changed that.  Now a lot of programmers
 have started to see Apple as evil.How much of the goodwill Apple once had with programmers have they
 lost over the App Store?  A third?  Half?  And that's just so far.
 The App Store is an ongoing karma leak.* * *How did Apple get into this mess?  Their fundamental problem is
 that they don't understand software.They treat iPhone apps the way they treat the music they sell through
 iTunes.  Apple is the channel; they own the user; if you want to
 reach users, you do it on their terms. The record labels agreed,
 reluctantly.  But this model doesn't work for software.  It doesn't
 work for an intermediary to own the user.  The software business
 learned that in the early 1980s, when companies like VisiCorp showed
 that although the words "software" and "publisher" fit together,
 the underlying concepts don't.  Software isn't like music or books.
 It's too complicated for a third party to act as an intermediary
 between developer and user.   And yet that's what Apple is trying
 to be with the App Store: a software publisher.  And a particularly
 overreaching one at that, with fussy tastes and a rigidly enforced
 house style.If software publishing didn't work in 1980, it works even less now
 that software development has evolved from a small number of big
 releases to a constant stream of small ones.  But Apple doesn't
 understand that either.  Their model of product development derives
 from hardware.  They work on something till they think it's finished,
 then they release it.  You have to do that with hardware, but because
 software is so easy to change, its design can benefit from evolution.
 The standard way to develop applications now is to launch fast and
 iterate.  Which means it's a disaster to have long, random delays
 each time you release a new version.Apparently Apple's attitude is that developers should be more careful
 when they submit a new version to the App Store.  They would say
 that.  But powerful as they are, they're not powerful enough to
 turn back the evolution of technology.  Programmers don't use
 launch-fast-and-iterate out of laziness.  They use it because it
 yields the best results.  By obstructing that process, Apple is
 making them do bad work, and programmers hate that as much as Apple
 would.How would Apple like it if when they discovered a serious bug in
 OS X, instead of releasing a software update immediately, they had
 to submit their code to an intermediary who sat on it for a month
 and then rejected it because it contained an icon they didn't like?By breaking software development, Apple gets the opposite of what
 they intended: the version of an app currently available in the App
 Store tends to be an old and buggy one.  One developer told me:
  As a result of their process, the App Store is full of half-baked
  applications. I make a new version almost every day that I release
  to beta users. The version on the App Store feels old and crappy.
  I'm sure that a lot of developers feel this way: One emotion is
  "I'm not really proud about what's in the App Store", and it's
  combined with the emotion "Really, it's Apple's fault."
 Another wrote:
  I believe that they think their approval process helps users by
  ensuring quality.  In reality, bugs like ours get through all the
  time and then it can take 4-8 weeks to get that bug fix approved,
  leaving users to think that iPhone apps sometimes just don't work.
  Worse for Apple, these apps work just fine on other platforms
  that have immediate approval processes.
 Actually I suppose Apple has a third misconception: that all the
 complaints about App Store approvals are not a serious problem.
 They must hear developers complaining.  But partners and suppliers
 are always complaining.  It would be a bad sign if they weren't;
 it would mean you were being too easy on them.  Meanwhile the iPhone
 is selling better than ever.  So why do they need to fix anything?They get away with maltreating developers, in the short term, because
 they make such great hardware.  I just bought a new 27" iMac a
 couple days ago.  It's fabulous.  The screen's too shiny, and the
 disk is surprisingly loud, but it's so beautiful that you can't
 make yourself care.So I bought it, but I bought it, for the first time, with misgivings.
 I felt the way I'd feel buying something made in a country with a
 bad human rights record.  That was new.  In the past when I bought
 things from Apple it was an unalloyed pleasure.  Oh boy!  They make
 such great stuff.  This time it felt like a Faustian bargain.  They
 make such great stuff, but they're such assholes.  Do I really want
 to support this company?* * *Should Apple care what people like me think?  What difference does
 it make if they alienate a small minority of their users?There are a couple reasons they should care.  One is that these
 users are the people they want as employees.  If your company seems
 evil, the best programmers won't work for you.  That hurt Microsoft
 a lot starting in the 90s.  Programmers started to feel sheepish
 about working there.  It seemed like selling out.  When people from
 Microsoft were talking to other programmers and they mentioned where
 they worked, there were a lot of self-deprecating jokes about having
 gone over to the dark side.  But the real problem for Microsoft
 wasn't the embarrassment of the people they hired.  It was the
 people they never got.  And you know who got them?  Google and
 Apple.  If Microsoft was the Empire, they were the Rebel Alliance.
 And it's largely because they got more of the best people that
 Google and Apple are doing so much better than Microsoft today.Why are programmers so fussy about their employers' morals?  Partly
 because they can afford to be.  The best programmers can work
 wherever they want.  They don't have to work for a company they
 have qualms about.But the other reason programmers are fussy, I think, is that evil
 begets stupidity.  An organization that wins by exercising power
 starts to lose the ability to win by doing better work.  And it's
 not fun for a smart person to work in a place where the best ideas
 aren't the ones that win.  I think the reason Google embraced "Don't
 be evil" so eagerly was not so much to impress the outside world
 as to inoculate themselves against arrogance.
 [1]That has worked for Google so far.  They've become more
 bureaucratic, but otherwise they seem to have held true to their
 original principles. With Apple that seems less the case.  When you
 look at the famous 
 1984 ad 
 now, it's easier to imagine Apple as the
 dictator on the screen than the woman with the hammer.
 [2]
 In fact, if you read the dictator's speech it sounds uncannily like a
 prophecy of the App Store.
  We have triumphed over the unprincipled dissemination of facts.We have created, for the first time in all history, a garden of
  pure ideology, where each worker may bloom secure from the pests
  of contradictory and confusing truths.
 The other reason Apple should care what programmers think of them
 is that when you sell a platform, developers make or break you.  If
 anyone should know this, Apple should.  VisiCalc made the Apple II.And programmers build applications for the platforms they use.  Most
 applications—most startups, probably—grow out of personal projects.
 Apple itself did.  Apple made microcomputers because that's what
 Steve Wozniak wanted for himself.  He couldn't have afforded a
 minicomputer. 
 [3]
 Microsoft likewise started out making interpreters
 for little microcomputers because
 Bill Gates and Paul Allen were interested in using them.  It's a
 rare startup that doesn't build something the founders use.The main reason there are so many iPhone apps is that so many programmers
 have iPhones.  They may know, because they read it in an article,
 that Blackberry has such and such market share.  But in practice
 it's as if RIM didn't exist. If they're going to build something,
 they want to be able to use it themselves, and that means building
 an iPhone app.So programmers continue to develop iPhone apps, even though Apple
 continues to maltreat them.  They're like someone stuck in an abusive
 relationship.  They're so attracted to the iPhone that they can't
 leave.  But they're looking for a way out.  One wrote:
  While I did enjoy developing for the iPhone, the control they
  place on the App Store does not give me the drive to develop
  applications as I would like. In fact I don't intend to make any
  more iPhone applications unless absolutely necessary.
 [4]
 Can anything break this cycle?  No device I've seen so far could.
 Palm and RIM haven't a hope.  The only credible contender is Android.
 But Android is an orphan; Google doesn't really care about it, not
 the way Apple cares about the iPhone.  Apple cares about the iPhone
 the way Google cares about search.* * *Is the future of handheld devices one locked down by Apple?  It's
 a worrying prospect.  It would be a bummer to have another grim
 monoculture like we had in the 1990s.  In 1995, writing software
 for end users was effectively identical with writing Windows
 applications.  Our horror at that prospect was the single biggest
 thing that drove us to start building web apps.At least we know now what it would take to break Apple's lock.
 You'd have to get iPhones out of programmers' hands.  If programmers
 used some other device for mobile web access, they'd start to develop
 apps for that instead.How could you make a device programmers liked better than the iPhone?
 It's unlikely you could make something better designed.  Apple
 leaves no room there.  So this alternative device probably couldn't
 win on general appeal.  It would have to win by virtue of some
 appeal it had to programmers specifically.One way to appeal to programmers is with software.  If you
 could think of an application programmers had to have, but that
 would be impossible in the circumscribed world of the iPhone, 
 you could presumably get them to switch.That would definitely happen if programmers started to use handhelds
 as development machines—if handhelds displaced laptops the
 way laptops displaced desktops.  You need more control of a development
 machine than Apple will let you have over an iPhone.Could anyone make a device that you'd carry around in your pocket
 like a phone, and yet would also work as a development machine?
 It's hard to imagine what it would look like.  But I've learned
 never to say never about technology.  A phone-sized device that
 would work as a development machine is no more miraculous by present
 standards than the iPhone itself would have seemed by the standards
 of 1995.My current development machine is a MacBook Air, which I use with
 an external monitor and keyboard in my office, and by itself when
 traveling.  If there was a version half the size I'd prefer it.
 That still wouldn't be small enough to carry around everywhere like
 a phone, but we're within a factor of 4 or so.  Surely that gap is
 bridgeable.  In fact, let's make it an
 RFS. Wanted: 
 Woman with hammer.Notes[1]
 When Google adopted "Don't be evil," they were still so small
 that no one would have expected them to be, yet.
 [2]
 The dictator in the 1984 ad isn't Microsoft, incidentally;
 it's IBM.  IBM seemed a lot more frightening in those days, but
 they were friendlier to developers than Apple is now.[3]
 He couldn't even afford a monitor.  That's why the Apple
 I used a TV as a monitor.[4]
 Several people I talked to mentioned how much they liked the
 iPhone SDK.  The problem is not Apple's products but their policies.
 Fortunately policies are software; Apple can change them instantly
 if they want to.  Handy that, isn't it?Thanks to Sam Altman, Trevor Blackwell, Ross Boucher, 
 James Bracy, Gabor Cselle,
 Patrick Collison, Jason Freedman, John Gruber, Joe Hewitt, Jessica Livingston,
 Robert Morris, Teng Siong Ong, Nikhil Pandit, Savraj Singh, and Jared Tame for reading drafts of this.
--- a/data/PaulGrahamEssaysLarge/avg.txt
+++ b/data/PaulGrahamEssaysLarge/avg.txt
@ -0,0 +1,375 @@
 Want to start a startup?  Get funded by
 Y Combinator.
 April 2001, rev. April 2003(This article is derived from a talk given at the 2001 Franz
 Developer Symposium.)
 In the summer of 1995, my friend Robert Morris and I
 started a startup called 
 Viaweb.  
 Our plan was to write
 software that would let end users build online stores.
 What was novel about this software, at the time, was
 that it ran on our server, using ordinary Web pages
 as the interface.A lot of people could have been having this idea at the
 same time, of course, but as far as I know, Viaweb was
 the first Web-based application.  It seemed such
 a novel idea to us that we named the company after it:
 Viaweb, because our software worked via the Web,
 instead of running on your desktop computer.Another unusual thing about this software was that it
 was written primarily in a programming language called
 Lisp. It was one of the first big end-user
 applications to be written in Lisp, which up till then
 had been used mostly in universities and research labs. [1]The Secret WeaponEric Raymond has written an essay called "How to Become a Hacker,"
 and in it, among other things, he tells would-be hackers what
 languages they should learn.  He suggests starting with Python and
 Java, because they are easy to learn.  The serious hacker will also
 want to learn C, in order to hack Unix, and Perl for system
 administration and cgi scripts.  Finally, the truly serious hacker
 should consider learning Lisp:
  Lisp is worth learning for the profound enlightenment experience
  you will have when you finally get it; that experience will make
  you a better programmer for the rest of your days, even if you
  never actually use Lisp itself a lot.
 This is the same argument you tend to hear for learning Latin.  It
 won't get you a job, except perhaps as a classics professor, but
 it will improve your mind, and make you a better writer in languages
 you do want to use, like English.But wait a minute.  This metaphor doesn't stretch that far.  The
 reason Latin won't get you a job is that no one speaks it.  If you
 write in Latin, no one can understand you.  But Lisp is a computer
 language, and computers speak whatever language you, the programmer,
 tell them to.So if Lisp makes you a better programmer, like he says, why wouldn't
 you want to use it? If a painter were offered a brush that would
 make him a better painter, it seems to me that he would want to
 use it in all his paintings, wouldn't he? I'm not trying to make
 fun of Eric Raymond here.  On the whole, his advice is good.  What
 he says about Lisp is pretty much the conventional wisdom.  But
 there is a contradiction in the conventional wisdom:  Lisp will
 make you a better programmer, and yet you won't use it.Why not?  Programming languages are just tools, after all.  If Lisp
 really does yield better programs, you should use it.  And if it
 doesn't, then who needs it?This is not just a theoretical question.  Software is a very
 competitive business, prone to natural monopolies.  A company that
 gets software written faster and better will, all other things
 being equal, put its competitors out of business.  And when you're
 starting a startup, you feel this very keenly.  Startups tend to
 be an all or nothing proposition.  You either get rich, or you get
 nothing.  In a startup, if you bet on the wrong technology, your
 competitors will crush you.Robert and I both knew Lisp well, and we couldn't see any reason
 not to trust our instincts and go with Lisp.  We knew that everyone
 else was writing their software in C++ or Perl.  But we also knew
 that that didn't mean anything.  If you chose technology that way,
 you'd be running Windows.  When you choose technology, you have to
 ignore what other people are doing, and consider only what will
 work the best.This is especially true in a startup.  In a big company, you can
 do what all the other big companies are doing.  But a startup can't
 do what all the other startups do.  I don't think a lot of people
 realize this, even in startups.The average big company grows at about ten percent a year.  So if
 you're running a big company and you do everything the way the
 average big company does it, you can expect to do as well as the
 average big company-- that is, to grow about ten percent a year.The same thing will happen if you're running a startup, of course.
 If you do everything the way the average startup does it, you should
 expect average performance.  The problem here is, average performance
 means that you'll go out of business.  The survival rate for startups
 is way less than fifty percent.  So if you're running a startup,
 you had better be doing something odd.  If not, you're in trouble.Back in 1995, we knew something that I don't think our competitors
 understood, and few understand even now:  when you're writing
 software that only has to run on your own servers, you can use
 any language you want.  When you're writing desktop software,
 there's a strong bias toward writing applications in the same
 language as the operating system.  Ten years ago, writing applications
 meant writing applications in C.  But with Web-based software,
 especially when you have the source code of both the language and
 the operating system, you can use whatever language you want.This new freedom is a double-edged sword, however.  Now that you
 can use any language, you have to think about which one to use.
 Companies that try to pretend nothing has changed risk finding that
 their competitors do not.If you can use any language, which do you use?  We chose Lisp.
 For one thing, it was obvious that rapid development would be
 important in this market.  We were all starting from scratch, so
 a company that could get new features done before its competitors
 would have a big advantage.  We knew Lisp was a really good language
 for writing software quickly, and server-based applications magnify
 the effect of rapid development, because you can release software
 the minute it's done.If other companies didn't want to use Lisp, so much the better.
 It might give us a technological edge, and we needed all the help
 we could get.  When we started Viaweb, we had no experience in
 business.  We didn't know anything about marketing, or hiring
 people, or raising money, or getting customers.  Neither of us had
 ever even had what you would call a real job.  The only thing we
 were good at was writing software.  We hoped that would save us.
 Any advantage we could get in the software department, we would
 take.So you could say that using Lisp was an experiment.  Our hypothesis
 was that if we wrote our software in Lisp, we'd be able to get
 features done faster than our competitors, and also to do things
 in our software that they couldn't do.  And because Lisp was so
 high-level, we wouldn't need a big development team, so our costs
 would be lower.  If this were so, we could offer a better product
 for less money, and still make a profit.  We would end up getting
 all the users, and our competitors would get none, and eventually
 go out of business.  That was what we hoped would happen, anyway.What were the results of this experiment?  Somewhat surprisingly,
 it worked.  We eventually had many competitors, on the order of
 twenty to thirty of them, but none of their software could compete
 with ours.  We had a wysiwyg online store builder that ran on the
 server and yet felt like a desktop application.  Our competitors
 had cgi scripts.  And we were always far ahead of them in features.
 Sometimes, in desperation, competitors would try to introduce
 features that we didn't have.  But with Lisp our development cycle
 was so fast that we could sometimes duplicate a new feature within
 a day or two of a competitor announcing it in a press release.  By
 the time journalists covering the press release got round to calling
 us, we would have the new feature too.It must have seemed to our competitors that we had some kind of
 secret weapon-- that we were decoding their Enigma traffic or
 something.  In fact we did have a secret weapon, but it was simpler
 than they realized.  No one was leaking news of their features to
 us.   We were just able to develop software faster than anyone
 thought possible.When I was about nine I happened to get hold of a copy of The Day
 of the Jackal, by Frederick Forsyth.  The main character is an
 assassin who is hired to kill the president of France.  The assassin
 has to get past the police to get up to an apartment that overlooks
 the president's route.  He walks right by them, dressed up as an
 old man on crutches, and they never suspect him.Our secret weapon was similar.  We wrote our software in a weird
 AI language, with a bizarre syntax full of parentheses.  For years
 it had annoyed me to hear Lisp described that way.  But now it
 worked to our advantage.  In business, there is nothing more valuable
 than a technical advantage your competitors don't understand.  In
 business, as in war, surprise is worth as much as force.And so, I'm a little embarrassed to say, I never said anything
 publicly about Lisp while we were working on Viaweb.  We never
 mentioned it to the press, and if you searched for Lisp on our Web
 site, all you'd find were the titles of two books in my bio.  This
 was no accident.  A startup should give its competitors as little
 information as possible.  If they didn't know what language our
 software was written in, or didn't care, I wanted to keep it that
 way.[2]The people who understood our technology best were the customers.
 They didn't care what language Viaweb was written in either, but
 they noticed that it worked really well.  It let them build great
 looking online stores literally in minutes.  And so, by word of
 mouth mostly, we got more and more users.  By the end of 1996 we
 had about 70 stores online.  At the end of 1997 we had 500.  Six
 months later, when Yahoo bought us, we had 1070 users.  Today, as
 Yahoo Store, this software continues to dominate its market.  It's
 one of the more profitable pieces of Yahoo, and the stores built
 with it are the foundation of Yahoo Shopping.  I left Yahoo in
 1999, so I don't know exactly how many users they have now, but
 the last I heard there were about 20,000.
 The Blub ParadoxWhat's so great about Lisp?  And if Lisp is so great, why doesn't
 everyone use it?  These sound like rhetorical questions, but actually
 they have straightforward answers.  Lisp is so great not because
 of some magic quality visible only to devotees, but because it is
 simply the most powerful language available.  And the reason everyone
 doesn't use it is that programming languages are not merely
 technologies, but habits of mind as well, and nothing changes
 slower.  Of course, both these answers need explaining.I'll begin with a shockingly controversial statement:  programming
 languages vary in power.Few would dispute, at least, that high level languages are more
 powerful than machine language.  Most programmers today would agree
 that you do not, ordinarily, want to program in machine language.
 Instead, you should program in a high-level language, and have a
 compiler translate it into machine language for you.  This idea is
 even built into the hardware now: since the 1980s, instruction sets
 have been designed for compilers rather than human programmers.Everyone knows it's a mistake to write your whole program by hand
 in machine language.  What's less often understood is that there
 is a more general principle here: that if you have a choice of
 several languages, it is, all other things being equal, a mistake
 to program in anything but the most powerful one. [3]There are many exceptions to this rule.  If you're writing a program
 that has to work very closely with a program written in a certain
 language, it might be a good idea to write the new program in the
 same language.  If you're writing a program that only has to do
 something very simple, like number crunching or bit manipulation,
 you may as well use a less abstract language, especially since it
 may be slightly faster.  And if you're writing a short, throwaway
 program, you may be better off just using whatever language has
 the best library functions for the task.  But in general, for
 application software, you want to be using the most powerful
 (reasonably efficient) language you can get, and using anything
 else is a mistake, of exactly the same kind, though possibly in a
 lesser degree, as programming in machine language.You can see that machine language is very low level.  But, at least
 as a kind of social convention, high-level languages are often all
 treated as equivalent.  They're not.  Technically the term "high-level
 language" doesn't mean anything very definite.  There's no dividing
 line with machine languages on one side and all the high-level
 languages on the other.  Languages fall along a continuum [4] of
 abstractness, from the most powerful all the way down to machine
 languages, which themselves vary in power.Consider Cobol.  Cobol is a high-level language, in the sense that
 it gets compiled into machine language.  Would anyone seriously
 argue that Cobol is equivalent in power to, say, Python?  It's
 probably closer to machine language than Python.Or how about Perl 4?  Between Perl 4 and Perl 5, lexical closures
 got added to the language.  Most Perl hackers would agree that Perl
 5 is more powerful than Perl 4.  But once you've admitted that,
 you've admitted that one high level language can be more powerful
 than another.  And it follows inexorably that, except in special
 cases, you ought to use the most powerful you can get.This idea is rarely followed to its conclusion, though.  After a
 certain age, programmers rarely switch languages voluntarily.
 Whatever language people happen to be used to, they tend to consider
 just good enough.Programmers get very attached to their favorite languages, and I
 don't want to hurt anyone's feelings, so to explain this point I'm
 going to use a hypothetical language called Blub.  Blub falls right
 in the middle of the abstractness continuum.  It is not the most
 powerful language, but it is more powerful than Cobol or machine
 language.And in fact, our hypothetical Blub programmer wouldn't use either
 of them.  Of course he wouldn't program in machine language.  That's
 what compilers are for.  And as for Cobol, he doesn't know how
 anyone can get anything done with it.  It doesn't even have x (Blub
 feature of your choice).As long as our hypothetical Blub programmer is looking down the
 power continuum, he knows he's looking down.  Languages less powerful
 than Blub are obviously less powerful, because they're missing some
 feature he's used to.  But when our hypothetical Blub programmer
 looks in the other direction, up the power continuum, he doesn't
 realize he's looking up.  What he sees are merely weird languages.
 He probably considers them about equivalent in power to Blub, but
 with all this other hairy stuff thrown in as well.  Blub is good
 enough for him, because he thinks in Blub.When we switch to the point of view of a programmer using any of
 the languages higher up the power continuum, however, we find that
 he in turn looks down upon Blub.  How can you get anything done in
 Blub? It doesn't even have y.By induction, the only programmers in a position to see all the
 differences in power between the various languages are those who
 understand the most powerful one.  (This is probably what Eric
 Raymond meant about Lisp making you a better programmer.) You can't
 trust the opinions of the others, because of the Blub paradox:
 they're satisfied with whatever language they happen to use, because
 it dictates the way they think about programs.I know this from my own experience, as a high school kid writing
 programs in Basic.  That language didn't even support recursion.
 It's hard to imagine writing programs without using recursion, but
 I didn't miss it at the time.  I thought in Basic.  And I was a
 whiz at it.  Master of all I surveyed.The five languages that Eric Raymond recommends to hackers fall at
 various points on the power continuum.  Where they fall relative
 to one another is a sensitive topic.  What I will say is that I
 think Lisp is at the top.  And to support this claim I'll tell you
 about one of the things I find missing when I look at the other
 four languages.  How can you get anything done in them, I think,
 without macros? [5]Many languages have something called a macro.  But Lisp macros are
 unique.  And believe it or not, what they do is related to the
 parentheses.  The designers of Lisp didn't put all those parentheses
 in the language just to be different.  To the Blub programmer, Lisp
 code looks weird.  But those parentheses are there for a reason.
 They are the outward evidence of a fundamental difference between
 Lisp and other languages.Lisp code is made out of Lisp data objects.  And not in the trivial
 sense that the source files contain characters, and strings are
 one of the data types supported by the language.  Lisp code, after
 it's read by the parser, is made of data structures that you can
 traverse.If you understand how compilers work, what's really going on is
 not so much that Lisp has a strange syntax as that Lisp has no
 syntax.  You write programs in the parse trees that get generated
 within the compiler when other languages are parsed.  But these
 parse trees are fully accessible to your programs.  You can write
 programs that manipulate them.  In Lisp, these programs are called
 macros.  They are programs that write programs.Programs that write programs?  When would you ever want to do that?
 Not very often, if you think in Cobol.  All the time, if you think
 in Lisp.  It would be convenient here if I could give an example
 of a powerful macro, and say there! how about that?  But if I did,
 it would just look like gibberish to someone who didn't know Lisp;
 there isn't room here to explain everything you'd need to know to
 understand what it meant.  In 
 Ansi Common Lisp I tried to move
 things along as fast as I could, and even so I didn't get to macros
 until page 160.But I think I can give a kind of argument that might be convincing.
 The source code of the Viaweb editor was probably about 20-25%
 macros.  Macros are harder to write than ordinary Lisp functions,
 and it's considered to be bad style to use them when they're not
 necessary.  So every macro in that code is there because it has to
 be.  What that means is that at least 20-25% of the code in this
 program is doing things that you can't easily do in any other
 language.  However skeptical the Blub programmer might be about my
 claims for the mysterious powers of Lisp, this ought to make him
 curious.  We weren't writing this code for our own amusement.  We
 were a tiny startup, programming as hard as we could in order to
 put technical barriers between us and our competitors.A suspicious person might begin to wonder if there was some
 correlation here.  A big chunk of our code was doing things that
 are very hard to do in other languages.  The resulting software
 did things our competitors' software couldn't do.  Maybe there was
 some kind of connection.  I encourage you to follow that thread.
 There may be more to that old man hobbling along on his crutches
 than meets the eye.Aikido for StartupsBut I don't expect to convince anyone 
 (over 25) 
 to go out and learn
 Lisp.  The purpose of this article is not to change anyone's mind,
 but to reassure people already interested in using Lisp-- people
 who know that Lisp is a powerful language, but worry because it
 isn't widely used.  In a competitive situation, that's an advantage.
 Lisp's power is multiplied by the fact that your competitors don't
 get it.If you think of using Lisp in a startup, you shouldn't worry that
 it isn't widely understood.  You should hope that it stays that
 way. And it's likely to.  It's the nature of programming languages
 to make most people satisfied with whatever they currently use.
 Computer hardware changes so much faster than personal habits that
 programming practice is usually ten to twenty years behind the
 processor.  At places like MIT they were writing programs in
 high-level languages in the early 1960s, but many companies continued
 to write code in machine language well into the 1980s.  I bet a
 lot of people continued to write machine language until the processor,
 like a bartender eager to close up and go home, finally kicked them
 out by switching to a risc instruction set.Ordinarily technology changes fast.  But programming languages are
 different: programming languages are not just technology, but what
 programmers think in.  They're half technology and half religion.[6]
 And so the median language, meaning whatever language the median
 programmer uses, moves as slow as an iceberg.  Garbage collection,
 introduced by Lisp in about 1960, is now widely considered to be
 a good thing.  Runtime typing, ditto, is growing in popularity.
 Lexical closures, introduced by Lisp in the early 1970s, are now,
 just barely, on the radar screen.  Macros, introduced by Lisp in the
 mid 1960s, are still terra incognita.Obviously, the median language has enormous momentum.  I'm not
 proposing that you can fight this powerful force.  What I'm proposing
 is exactly the opposite: that, like a practitioner of Aikido, you
 can use it against your opponents.If you work for a big company, this may not be easy.  You will have
 a hard time convincing the pointy-haired boss to let you build
 things in Lisp, when he has just read in the paper that some other
 language is poised, like Ada was twenty years ago, to take over
 the world.  But if you work for a startup that doesn't have
 pointy-haired bosses yet, you can, like we did, turn the Blub
 paradox to your advantage:  you can use technology that your
 competitors, glued immovably to the median language, will never be
 able to match.If you ever do find yourself working for a startup, here's a handy
 tip for evaluating competitors.  Read their job listings.  Everything
 else on their site may be stock photos or the prose equivalent,
 but the job listings have to be specific about what they want, or
 they'll get the wrong candidates.During the years we worked on Viaweb I read a lot of job descriptions.
 A new competitor seemed to emerge out of the woodwork every month
 or so.  The first thing I would do, after checking to see if they
 had a live online demo, was look at their job listings.  After a
 couple years of this I could tell which companies to worry about
 and which not to.  The more of an IT flavor the job descriptions
 had, the less dangerous the company was.  The safest kind were the
 ones that wanted Oracle experience.  You never had to worry about
 those.  You were also safe if they said they wanted C++ or Java
 developers.  If they wanted Perl or Python programmers, that would
 be a bit frightening-- that's starting to sound like a company
 where the technical side, at least, is run by real hackers.  If I
 had ever seen a job posting looking for Lisp hackers, I would have
 been really worried.
 Notes[1] Viaweb at first had two parts: the editor, written in Lisp,
 which people used to build their sites, and the ordering system,
 written in C, which handled orders.  The first version was mostly
 Lisp, because the ordering system was small.  Later we added two
 more modules, an image generator written in C, and a back-office
 manager written mostly in Perl.In January 2003, Yahoo released a new version of the editor 
 written in C++ and Perl.  It's hard to say whether the program is no
 longer written in Lisp, though, because to translate this program
 into C++ they literally had to write a Lisp interpreter: the source
 files of all the page-generating templates are still, as far as I
 know,  Lisp code.  (See Greenspun's Tenth Rule.)[2] Robert Morris says that I didn't need to be secretive, because
 even if our competitors had known we were using Lisp, they wouldn't
 have understood why:  "If they were that smart they'd already be
 programming in Lisp."[3] All languages are equally powerful in the sense of being Turing
 equivalent, but that's not the sense of the word programmers care
 about. (No one wants to program a Turing machine.)  The kind of
 power programmers care about may not be formally definable, but
 one way to explain it would be to say that it refers to features
 you could only get in the less powerful language by writing an
 interpreter for the more powerful language in it. If language A
 has an operator for removing spaces from strings and language B
 doesn't, that probably doesn't make A more powerful, because you
 can probably write a subroutine to do it in B.  But if A supports,
 say, recursion, and B doesn't, that's not likely to be something
 you can fix by writing library functions.[4] Note to nerds: or possibly a lattice, narrowing toward the top;
 it's not the shape that matters here but the idea that there is at
 least a partial order.[5] It is a bit misleading to treat macros as a separate feature.
 In practice their usefulness is greatly enhanced by other Lisp
 features like lexical closures and rest parameters.[6] As a result, comparisons of programming languages either take
 the form of religious wars or undergraduate textbooks so determinedly
 neutral that they're really works of anthropology.  People who
 value their peace, or want tenure, avoid the topic.  But the question
 is only half a religious one; there is something there worth
 studying, especially if you want to design new languages.
--- a/data/PaulGrahamEssaysLarge/before.txt
+++ b/data/PaulGrahamEssaysLarge/before.txt
@ -0,0 +1,387 @@
 Want to start a startup?  Get funded by
 Y Combinator.
 October 2014(This essay is derived from a guest lecture in Sam Altman's startup class at
 Stanford.  It's intended for college students, but much of it is
 applicable to potential founders at other ages.)One of the advantages of having kids is that when you have to give
 advice, you can ask yourself "what would I tell my own kids?"  My
 kids are little, but I can imagine what I'd tell them about startups
 if they were in college, and that's what I'm going to tell you.Startups are very counterintuitive.  I'm not sure why.  Maybe it's
 just because knowledge about them hasn't permeated our culture yet.
 But whatever the reason, starting a startup is a task where you
 can't always trust your instincts.It's like skiing in that way.  When you first try skiing and you
 want to slow down, your instinct is to lean back.  But if you lean
 back on skis you fly down the hill out of control.  So part of
 learning to ski is learning to suppress that impulse.  Eventually
 you get new habits, but at first it takes a conscious effort.  At
 first there's a list of things you're trying to remember as you
 start down the hill.Startups are as unnatural as skiing, so there's a similar list for
 startups. Here I'm going to give you the first part of it — the things
 to remember if you want to prepare yourself to start a startup.
 CounterintuitiveThe first item on it is the fact I already mentioned: that startups
 are so weird that if you trust your instincts, you'll make a lot
 of mistakes.  If you know nothing more than this, you may at least
 pause before making them.When I was running Y Combinator I used to joke that our function
 was to tell founders things they would ignore.  It's really true.
 Batch after batch, the YC partners warn founders about mistakes
 they're about to make, and the founders ignore them, and then come
 back a year later and say "I wish we'd listened."Why do the founders ignore the partners' advice?  Well, that's the
 thing about counterintuitive ideas: they contradict your intuitions.
 They seem wrong.  So of course your first impulse is to disregard
 them.  And in fact my joking description is not merely the curse
 of Y Combinator but part of its raison d'etre. If founders' instincts
 already gave them the right answers, they wouldn't need us.  You
 only need other people to give you advice that surprises you. That's
 why there are a lot of ski instructors and not many running
 instructors.
 [1]You can, however, trust your instincts about people.  And in fact
 one of the most common mistakes young founders make is not to
 do that enough.  They get involved with people who seem impressive,
 but about whom they feel some misgivings personally.  Later when
 things blow up they say "I knew there was something off about him,
 but I ignored it because he seemed so impressive."If you're thinking about getting involved with someone — as a
 cofounder, an employee, an investor, or an acquirer — and you
 have misgivings about them, trust your gut.  If someone seems
 slippery, or bogus, or a jerk, don't ignore it.This is one case where it pays to be self-indulgent. Work with
 people you genuinely like, and you've known long enough to be sure.
 ExpertiseThe second counterintuitive point is that it's not that important
 to know a lot about startups.  The way to succeed in a startup is
 not to be an expert on startups, but to be an expert on your users
 and the problem you're solving for them.
 Mark Zuckerberg didn't succeed because he was an expert on startups.
 He succeeded despite being a complete noob at startups, because he
 understood his users really well.If you don't know anything about, say, how to raise an angel round,
 don't feel bad on that account.  That sort of thing you can learn
 when you need to, and forget after you've done it.In fact, I worry it's not merely unnecessary to learn in great
 detail about the mechanics of startups, but possibly somewhat
 dangerous.  If I met an undergrad who knew all about convertible
 notes and employee agreements and (God forbid) class FF stock, I
 wouldn't think "here is someone who is way ahead of their peers."
 It would set off alarms.  Because another of the characteristic
 mistakes of young founders is to go through the motions of starting
 a startup.  They make up some plausible-sounding idea, raise money
 at a good valuation, rent a cool office, hire a bunch of people.
 From the outside that seems like what startups do.  But the next
 step after rent a cool office and hire a bunch of people is: gradually
 realize how completely fucked they are, because while imitating all
 the outward forms of a startup they have neglected the one thing
 that's actually essential: making something people want.
 GameWe saw this happen so often that we made up a name for it: playing
 house.  Eventually I realized why it was happening.  The reason
 young founders go through the motions of starting a startup is
 because that's what they've been trained to do for their whole lives
 up to that point.  Think about what you have to do to get into
 college, for example.  Extracurricular activities, check.  Even in
 college classes most of the work is as artificial as running laps.I'm not attacking the educational system for being this way. There
 will always be a certain amount of fakeness in the work you do when
 you're being taught something, and if you measure their performance
 it's inevitable that people will exploit the difference to the point
 where much of what you're measuring is artifacts of the fakeness.I confess I did it myself in college. I found that in a lot of
 classes there might only be 20 or 30 ideas that were the right shape
 to make good exam questions.  The way I studied for exams in these
 classes was not (except incidentally) to master the material taught
 in the class, but to make a list of potential exam questions and
 work out the answers in advance. When I walked into the final, the
 main thing I'd be feeling was curiosity about which of my questions
 would turn up on the exam.  It was like a game.It's not surprising that after being trained for their whole lives
 to play such games, young founders' first impulse on starting a
 startup is to try to figure out the tricks for winning at this new
 game. Since fundraising appears to be the measure of success for
 startups (another classic noob mistake), they always want to know what the
 tricks are for convincing investors.  We tell them the best way to
 convince investors is to make a startup
 that's actually doing well, meaning growing fast, and then simply
 tell investors so.  Then they want to know what the tricks are for
 growing fast.  And we have to tell them the best way to do that is
 simply to make something people want.So many of the conversations YC partners have with young founders
 begin with the founder asking "How do we..." and the partner replying
 "Just..."Why do the founders always make things so complicated?  The reason,
 I realized, is that they're looking for the trick.So this is the third counterintuitive thing to remember about
 startups: starting a startup is where gaming the system stops
 working.  Gaming the system may continue to work if you go to work
 for a big company. Depending on how broken the company is, you can
 succeed by sucking up to the right people, giving the impression
 of productivity, and so on. 
 [2]
 But that doesn't work with startups.
 There is no boss to trick, only users, and all users care about is
 whether your product does what they want. Startups are as impersonal
 as physics.  You have to make something people want, and you prosper
 only to the extent you do.The dangerous thing is, faking does work to some degree on investors.
 If you're super good at sounding like you know what you're talking
 about, you can fool investors for at least one and perhaps even two
 rounds of funding.  But it's not in your interest to.  The company
 is ultimately doomed.  All you're doing is wasting your own time
 riding it down.So stop looking for the trick. There are tricks in startups, as
 there are in any domain, but they are an order of magnitude less
 important than solving the real problem. A founder who knows nothing
 about fundraising but has made something users love will have an
 easier time raising money than one who knows every trick in the
 book but has a flat usage graph. And more importantly, the founder
 who has made something users love is the one who will go on to
 succeed after raising the money.Though in a sense it's bad news in that you're deprived of one of
 your most powerful weapons, I think it's exciting that gaming the
 system stops working when you start a startup.  It's exciting that
 there even exist parts of the world where you win by doing good
 work.  Imagine how depressing the world would be if it were all
 like school and big companies, where you either have to spend a lot
 of time on bullshit things or lose to people who do.
 [3]
 I would
 have been delighted if I'd realized in college that there were parts
 of the real world where gaming the system mattered less than others,
 and a few where it hardly mattered at all.  But there are, and this
 variation is one of the most important things to consider when
 you're thinking about your future.  How do you win in each type of
 work, and what would you like to win by doing?
 [4]
 All-ConsumingThat brings us to our fourth counterintuitive point: startups are
 all-consuming.  If you start a startup, it will take over your life
 to a degree you cannot imagine.  And if your startup succeeds, it
 will take over your life for a long time: for several years at the
 very least, maybe for a decade, maybe for the rest of your working
 life.  So there is a real opportunity cost here.Larry Page may seem to have an enviable life, but there are aspects
 of it that are unenviable.  Basically at 25 he started running as
 fast as he could and it must seem to him that he hasn't stopped to
 catch his breath since.  Every day new shit happens in the Google
 empire that only the CEO can deal with, and he, as CEO, has to deal
 with it.  If he goes on vacation for even a week, a whole week's
 backlog of shit accumulates.  And he has to bear this uncomplainingly,
 partly because as the company's daddy he can never show fear or
 weakness, and partly because billionaires get less than zero sympathy
 if they talk about having difficult lives.  Which has the strange
 side effect that the difficulty of being a successful startup founder
 is concealed from almost everyone except those who've done it.Y Combinator has now funded several companies that can be called
 big successes, and in every single case the founders say the same
 thing.  It never gets any easier.  The nature of the problems change.
 You're worrying about construction delays at your London office
 instead of the broken air conditioner in your studio apartment.
 But the total volume of worry never decreases; if anything it
 increases.Starting a successful startup is similar to having kids in that
 it's like a button you push that changes your life irrevocably.
 And while it's truly wonderful having kids, there are a lot of
 things that are easier to do before you have them than after.  Many
 of which will make you a better parent when you do have kids. And
 since you can delay pushing the button for a while, most people in
 rich countries do.Yet when it comes to startups, a lot of people seem to think they're
 supposed to start them while they're still in college.  Are you
 crazy?  And what are the universities thinking?  They go out of
 their way to ensure their students are well supplied with contraceptives,
 and yet they're setting up entrepreneurship programs and startup
 incubators left and right.To be fair, the universities have their hand forced here.  A lot
 of incoming students are interested in startups.  Universities are,
 at least de facto, expected to prepare them for their careers.  So
 students who want to start startups hope universities can teach
 them about startups.  And whether universities can do this or not,
 there's some pressure to claim they can, lest they lose applicants
 to other universities that do.Can universities teach students about startups?  Yes and no.  They
 can teach students about startups, but as I explained before, this
 is not what you need to know.  What you need to learn about are the
 needs of your own users, and you can't do that until you actually
 start the company.
 [5]
 So starting a startup is intrinsically
 something you can only really learn by doing it.  And it's impossible
 to do that in college, for the reason I just explained: startups
 take over your life.  You can't start a startup for real as a
 student, because if you start a startup for real you're not a student
 anymore. You may be nominally a student for a bit, but you won't even
 be that for long.
 [6]Given this dichotomy, which of the two paths should you take?  Be
 a real student and not start a startup, or start a real startup and
 not be a student?  I can answer that one for you. Do not start a
 startup in college.  How to start a startup is just a subset of a
 bigger problem you're trying to solve: how to have a good life.
 And though starting a startup can be part of a good life for a lot
 of ambitious people, age 20 is not the optimal time to do it.
 Starting a startup is like a brutally fast depth-first search.  Most
 people should still be searching breadth-first at 20.You can do things in your early 20s that you can't do as well before
 or after, like plunge deeply into projects on a whim and travel
 super cheaply with no sense of a deadline.  For unambitious people,
 this sort of thing is the dreaded "failure to launch," but for the
 ambitious ones it can be an incomparably valuable sort of exploration.
 If you start a startup at 20 and you're sufficiently successful,
 you'll never get to do it.
 [7]Mark Zuckerberg will never get to bum around a foreign country.  He
 can do other things most people can't, like charter jets to fly him
 to foreign countries. But success has taken a lot of the serendipity
 out of his life. Facebook is running him as much as he's running
 Facebook. And while it can be very cool to be in the grip of a
 project you consider your life's work, there are advantages to
 serendipity too, especially early in life.  Among other things it
 gives you more options to choose your life's work from.There's not even a tradeoff here. You're not sacrificing anything
 if you forgo starting a startup at 20, because you're more likely
 to succeed if you wait.  In the unlikely case that you're 20 and
 one of your side projects takes off like Facebook did, you'll face
 a choice of running with it or not, and it may be reasonable to run
 with it.  But the usual way startups take off is for the founders
 to make them take off, and it's gratuitously
 stupid to do that at 20.
 TryShould you do it at any age?  I realize I've made startups sound
 pretty hard.  If I haven't, let me try again: starting a startup
 is really hard.  What if it's too hard?  How can you tell if you're
 up to this challenge?The answer is the fifth counterintuitive point: you can't tell. Your
 life so far may have given you some idea what your prospects might
 be if you tried to become a mathematician, or a professional football
 player.  But unless you've had a very strange life you haven't done
 much that was like being a startup founder.
 Starting a startup will change you a lot.  So what you're trying
 to estimate is not just what you are, but what you could grow into,
 and who can do that?For the past 9 years it was my job to predict whether people would
 have what it took to start successful startups.  It was easy to
 tell how smart they were, and most people reading this will be over
 that threshold.  The hard part was predicting how tough and ambitious they would become.  There
 may be no one who has more experience at trying to predict that,
 so I can tell you how much an expert can know about it, and the
 answer is: not much.  I learned to keep a completely open mind about
 which of the startups in each batch would turn out to be the stars.The founders sometimes think they know. Some arrive feeling sure
 they will ace Y Combinator just as they've aced every one of the (few,
 artificial, easy) tests they've faced in life so far.  Others arrive
 wondering how they got in, and hoping YC doesn't discover whatever
 mistake caused it to accept them.  But there is little correlation
 between founders' initial attitudes and how well their companies
 do.I've read that the same is true in the military — that the
 swaggering recruits are no more likely to turn out to be really
 tough than the quiet ones. And probably for the same reason: that
 the tests involved are so different from the ones in their previous
 lives.If you're absolutely terrified of starting a startup, you probably
 shouldn't do it.  But if you're merely unsure whether you're up to
 it, the only way to find out is to try.  Just not now.
 IdeasSo if you want to start a startup one day, what should you do in
 college?  There are only two things you need initially: an idea and
 cofounders.  And the m.o. for getting both is the same.  Which leads
 to our sixth and last counterintuitive point: that the way to get
 startup ideas is not to try to think of startup ideas.I've written a whole essay on this,
 so I won't repeat it all here.  But the short version is that if
 you make a conscious effort to think of startup ideas, the ideas
 you come up with will not merely be bad, but bad and plausible-sounding,
 meaning you'll waste a lot of time on them before realizing they're
 bad.The way to come up with good startup ideas is to take a step back.
 Instead of making a conscious effort to think of startup ideas,
 turn your mind into the type that startup ideas form in without any
 conscious effort.  In fact, so unconsciously that you don't even
 realize at first that they're startup ideas.This is not only possible, it's how Apple, Yahoo, Google, and
 Facebook all got started.  None of these companies were even meant
 to be companies at first.  They were all just side projects.  The
 best startups almost have to start as side projects, because great
 ideas tend to be such outliers that your conscious mind would reject
 them as ideas for companies.Ok, so how do you turn your mind into the type that startup ideas
 form in unconsciously?  (1) Learn a lot about things that matter,
 then (2) work on problems that interest you (3) with people you
 like and respect.  The third part, incidentally, is how you get
 cofounders at the same time as the idea.The first time I wrote that paragraph, instead of "learn a lot about
 things that matter," I wrote "become good at some technology." But
 that prescription, though sufficient, is too narrow.  What was
 special about Brian Chesky and Joe Gebbia was not that they were
 experts in technology.  They were good at design, and perhaps even
 more importantly, they were good at organizing groups and making
 projects happen.  So you don't have to work on technology per se,
 so long as you work on problems demanding enough to stretch you.What kind of problems are those?  That is very hard to answer in
 the general case.  History is full of examples of young people who
 were working on important problems that no
 one else at the time thought were important, and in particular
 that their parents didn't think were important.  On the other hand,
 history is even fuller of examples of parents who thought their
 kids were wasting their time and who were right.  So how do you
 know when you're working on real stuff?
 [8]I know how I know.  Real problems are interesting, and I am
 self-indulgent in the sense that I always want to work on interesting
 things, even if no one else cares about them (in fact, especially
 if no one else cares about them), and find it very hard to make
 myself work on boring things, even if they're supposed to be
 important.My life is full of case after case where I worked on something just
 because it seemed interesting, and it turned out later to be useful
 in some worldly way.  Y
 Combinator itself was something I only did because it seemed
 interesting. So I seem to have some sort of internal compass that
 helps me out.  But I don't know what other people have in their
 heads. Maybe if I think more about this I can come up with heuristics
 for recognizing genuinely interesting problems, but for the moment
 the best I can offer is the hopelessly question-begging advice that
 if you have a taste for genuinely interesting problems, indulging
 it energetically is the best way to prepare yourself for a startup.
 And indeed, probably also the best way to live.
 [9]But although I can't explain in the general case what counts as an
 interesting problem, I can tell you about a large subset of them.
 If you think of technology as something that's spreading like a
 sort of fractal stain, every moving point on the edge represents
 an interesting problem.  So one guaranteed way to turn your mind
 into the type that has good startup ideas is to get yourself to the
 leading edge of some technology — to cause yourself, as Paul
 Buchheit put it, to "live in the future." When you reach that point,
 ideas that will seem to other people uncannily prescient will seem
 obvious to you.  You may not realize they're startup ideas, but
 you'll know they're something that ought to exist.For example, back at Harvard in the mid 90s a fellow grad student
 of my friends Robert and Trevor wrote his own voice over IP software.
 He didn't mean it to be a startup, and he never tried to turn it
 into one.  He just wanted to talk to his girlfriend in Taiwan without
 paying for long distance calls, and since he was an expert on
 networks it seemed obvious to him that the way to do it was turn
 the sound into packets and ship it over the Internet. He never did
 any more with his software than talk to his girlfriend, but this
 is exactly the way the best startups get started.So strangely enough the optimal thing to do in college if you want
 to be a successful startup founder is not some sort of new, vocational
 version of college focused on "entrepreneurship." It's the classic
 version of college as education for its own sake. If you want to
 start a startup after college, what you should do in college is
 learn powerful things.  And if you have genuine intellectual
 curiosity, that's what you'll naturally tend to do if you just
 follow your own inclinations.
 [10]The component of entrepreneurship that really matters is domain
 expertise.  The way to become Larry Page was to become an expert
 on search. And the way to become an expert on search was to be
 driven by genuine curiosity, not some ulterior motive.At its best, starting a startup is merely an ulterior motive for
 curiosity.  And you'll do it best if you introduce the ulterior
 motive toward the end of the process.So here is the ultimate advice for young would-be startup founders,
 boiled down to two words: just learn.
 Notes[1]
 Some founders listen more than others, and this tends to be a
 predictor of success. One of the things I
 remember about the Airbnbs during YC is how intently they listened.[2]
 In fact, this is one of the reasons startups are possible.  If
 big companies weren't plagued by internal inefficiencies, they'd
 be proportionately more effective, leaving less room for startups.[3]
 In a startup you have to spend a lot of time on schleps, but this sort of work is merely
 unglamorous, not bogus.[4]
 What should you do if your true calling is gaming the system?
 Management consulting.[5]
 The company may not be incorporated, but if you start to get
 significant numbers of users, you've started it, whether you realize
 it yet or not.[6]
 It shouldn't be that surprising that colleges can't teach
 students how to be good startup founders, because they can't teach
 them how to be good employees either.The way universities "teach" students how to be employees is to
 hand off the task to companies via internship programs.  But you
 couldn't do the equivalent thing for startups, because by definition
 if the students did well they would never come back.[7]
 Charles Darwin was 22 when he received an invitation to travel
 aboard the HMS Beagle as a naturalist.  It was only because he was
 otherwise unoccupied, to a degree that alarmed his family, that he
 could accept it. And yet if he hadn't we probably would not know
 his name.[8]
 Parents can sometimes be especially conservative in this
 department.  There are some whose definition of important problems
 includes only those on the critical path to med school.[9]
 I did manage to think of a heuristic for detecting whether you
 have a taste for interesting ideas: whether you find known boring
 ideas intolerable.  Could you endure studying literary theory, or
 working in middle management at a large company?[10]
 In fact, if your goal is to start a startup, you can stick
 even more closely to the ideal of a liberal education than past
 generations have. Back when students focused mainly on getting a
 job after college, they thought at least a little about how the
 courses they took might look to an employer.  And perhaps even
 worse, they might shy away from taking a difficult class lest they
 get a low grade, which would harm their all-important GPA.  Good
 news: users don't care what your GPA
 was.  And I've never heard of investors caring either.  Y Combinator
 certainly never asks what classes you took in college or what grades
 you got in them.
 Thanks to Sam Altman, Paul Buchheit, John Collison, Patrick
 Collison, Jessica Livingston, Robert Morris, Geoff Ralston, and
 Fred Wilson for reading drafts of this.
--- a/docs/prompt
+++ b/docs/prompt
@ -1 +1,222 @@
-TODO
+# Example App for text summarization & QA using minillmflow
 from minillmflow import Node, BatchNode, Flow, BatchFlow, AsyncNode, AsyncFlow, BatchAsyncFlow
 import os
 # 1) Implement a simple LLM helper (OpenAI in this example).
 def call_LLM(prompt):
    # Users must set an OpenAI API key; can also load from env var, etc.
    openai.api_key = "YOUR_API_KEY_HERE"
    r = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}]
    )
    return r.choices[0].message.content
 # 2) Create a shared store (dict) for Node/Flow data exchange.
 #    This can be replaced with a DB or other storage.
 #    Design the structure / schema based on the app requirements.
 shared = {"data": {}, "summary": {}}
 # 3) Create a Node that loads data from disk into shared['data'].
 class LoadData(Node):
    # For compute-intensive operations, do them in prep().
    def prep(self, shared):
        path = "../data/PaulGrahamEssaysLarge"
        for filename in os.listdir(path):
            with open(os.path.join(path, filename), 'r') as f:
                shared['data'][filename] = f.read()
    # If LLM was needed, we'd handle it in exec(). Not needed here.
    # (idempotent so it can be retried if needed)
    def exec(self,shared,prep_res): pass 
    # post() can update shared again or decide the next node (by return the action).
    def post(self,shared,prep_res,exec_res): pass 
 load_data = LoadData()
 # Run the data-loading node once
 load_data.run(shared)
 # 4) Create a Node that summarizes a single file using the LLM.
 class SummarizeFile(Node):
    def prep(self, shared):
        # Use self.params (which must remain immutable during prep/exec/post).
        # Typically, we only store identifying info in params (e.g., filename).
        content = shared['data'][self.params['filename']]
        return content
    def exec(self, shared, prep_res):
        content = prep_res
        prompt = f"{content} Respond a summary of above in 10 words"
        summary = call_llm(prompt)
        return summary
    def post(self, shared, prep_res, exec_res):
        shared["summary"][self.params['filename']] = exec_res
 summarize_file = SummarizeFile()
 # For testing, we set params directly on the node.
 # In real usage, you'd set them in a Flow or BatchFlow.
 summarize_file.set_params({"filename":"addiction.txt"})
 summarize_file.run(shared)
 # 5) If data is large, we can apply a map-reduce pattern:
 #    - MapSummaries(BatchNode) => chunk the file and summarize each chunk
 #    - ReduceSummaries(Node)   => combine those chunk-level summaries
 class MapSummaries(BatchNode):
    def prep(self, shared):
        content = shared['data'][self.params['filename']]
        chunk_size = 10000
        chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
        # Must return an iterable (list or generator) for a BatchNode.
        return chunks
    def exec(self, shared, prep_res):
        # Each iteration of prep_res corresponds to a single chunk.
        chunk = prep_res
        prompt = f"{chunk} Respond a summary of above in 10 words"
        summary = call_llm(prompt)
        return summary
    def post(self, shared, prep_res, exec_res):
        # exec_res is a list of exec() results (summaries for each chunk).
        combined_summary = [f"{i}. {summary}" for i, summary in enumerate(exec_res)]
        shared["summary"][self.params['filename']] = combined_summary
 class ReduceSummaries(Node):
    def prep(self, shared):
        # Retrieve the list of chunk summaries from shared storage
        return shared["summary"][self.params['filename']]
    def exec(self, shared, prep_res):
        combined_summary = prep_res
        prompt = f"{combined_summary} Respond a summary of above in 10 words"
        summary = call_llm(prompt)
        return summary
    def post(self, shared, prep_res, exec_res):
        # Store the combined summary as the final summary for this file.
        shared["summary"][self.params['filename']] = exec_res
 map_summaries = MapSummaries()
 reduce_summaries = ReduceSummaries()
 # Link map_summaries to reduce_summaries with an action
 # By default, the action is "default" (when post returns None, it takes "default" action)
 # This is the same as map_summaries - "default" >> reduce_summaries
 map_summaries >> reduce_summaries
 # We don't directly call map_summaries.run(shared), 
 # because that alone would process only the map step without reduce.
 # 6) Instead, create a Flow that starts from map_summaries (a Node) 
 #    and automatically includes reduce_summaries. 
 #    Note: A Flow can also start from any other Flow or BatchFlow.
 file_summary_flow = Flow(start=map_summaries)
 # When a flow params is set, it will recursively set its params to all nodes in the flow
 file_summary_flow.set_params({"filename":"before.txt"})
 file_summary_flow.run(shared)
 # 7) Summarize all files using a BatchFlow that reruns file_summary_flow for each file
 class SummarizeAllFiles(BatchFlow):
    def prep(self, shared):
        # Return a list of parameters to apply in each flow iteration.
        # Each individual param will be merged with this node's own params 
        # Allowing nesting of multi-level BatchFlow. 
        # E.g., first level diretcory, second level file.
        return [{"filename":filename} for filename in shared['data']]
 summarize_all_files = SummarizeAllFiles(start=file_summary_flow)
 summarize_all_files.run(shared)
 # 8) QA Agent: Find the most relevant file based on summary with actions
 #    if no question is asked:
 #       (a) end: terminate the flow 
 #    if question is asked:
 #         if relevant file is found:
 #            (b) answer: move to answer node and read the whole file to answer the question
 #         if no relevant file is found:
 #            (c) retry: retry the process to find the relevant file
 class FindRelevantFile(Node):
    def prep(self, shared):
        question = input("Enter a question: ")
        formatted_list = [f"- '{filename}': {shared['summary'][filename]}" 
                         for filename in shared['summary']]
        return question, formatted_list
    def exec(self, shared, prep_res):
        question, formatted_list = prep_res
        if not question:
            return {"think":"no question", "has_relevant":False}
        # Provide a structured YAML output that includes:
        # - The chain of thought
        # - Whether any relevant file was found
        # - The most relevant file if found
        prompt = f"""Question: {question} 
 Find the most relevant file from: 
 {formatted_list}
 If no relevant file, explain why
 Respond in yaml without additional information:
 think: the question has/has no relevant file ...
 has_relevant: true/false
 most_relevant: filename"""
        response = call_llm(prompt)
        import yaml
        result = yaml.safe_load(response)
        # Ensure required fields are present
        assert "think" in result
        assert "has_relevant" in result
        assert "most_relevant" in result if result["has_relevant"] else True
        return result
    # handle errors by returning a default response in case of exception after retries
    def process_after_fail(self,shared,prep_res,exc):
        # if not overridden, the default is to throw the exception
        return {"think":"error finding the file", "has_relevant":False}
    def post(self, shared, prep_res, exec_res):
        question, _ = prep_res
        # Decide what to do next based on the results
        if not question:
            print(f"No question asked")
            return "end"
        if exec_res["has_relevant"]:
            # Store the question and most relevant file in shared
            shared["question"] = question
            shared["relevant_file"] = exec_res['most_relevant']
            print(f"Relevant file found: {exec_res['most_relevant']}")
            return "answer"
        else:
            print(f"No relevant file found: {exec_res['think']}")
            return "retry"
 class AnswerQuestion(Node):
    def prep(self, shared):
        question = shared['question']
        relevant_file = shared['relevant_file']
        # Read the whole file content
        file_content = shared['data'][relevant_file]
        return question, file_content
    def exec(self, shared, prep_res):
        question, file_content = prep_res
        prompt = f"""Question: {question}
 File: {file_content}
 Answer the question in 50 words"""
        response = call_llm(prompt)
        return response
    def post(self, shared, prep_res, exec_res):
        print(f"Answer: {exec_res}")
 class NoOp(Node):
    pass
 # Configure the QA agent with appropriate transitions and retries
 find_relevant_file = FindRelevantFile(max_retries=3)
 answer_question = AnswerQuestion()
 no_op = NoOp()
 # Connect the nodes based on the actions they return
 find_relevant_file - "answer" >> answer_question >> find_relevant_file
 find_relevant_file - "retry" >> find_relevant_file
 find_relevant_file - "end" >> no_op
 qa_agent = Flow(start=find_relevant_file)
 qa_agent.run(shared)
 # Above example demonstrates the use of minillmflow
 # Next, build another app based on the same principles
 # First, given the app's requirements, design the Node/Flow structure
 # Then, design the data structure within shared storage, and how it's updated
 # Finally, implement the Nodes and Flows to achieve the desired functionality
--- a/minillmflow/init.py
+++ b/minillmflow/init.py
@ -3,9 +3,9 @@ import asyncio, warnings
 class BaseNode:
    def __init__(self): self.params,self.successors={},{}
    def set_params(self,params): self.params=params
-    def add_successor(self,node,cond="default"):
+    def add_successor(self,node,action="default"):
-        if cond in self.successors: warnings.warn(f"Overwriting successor for condition '{cond}'")
+        if action in self.successors: warnings.warn(f"Overwriting successor for action '{action}'")
-        self.successors[cond]=node;return node
+        self.successors[action]=node;return node
    def prep(self,shared): return None
    def exec(self,shared,prep_res): return None
    def _exec(self,shared,prep_res): return self.exec(shared,prep_res)
@ -18,13 +18,13 @@ class BaseNode:
        if self.successors: warnings.warn("Node won't run successors. Use a parent Flow instead.")
        return self._run(shared)
    def __rshift__(self,other): return self.add_successor(other)
-    def __sub__(self,cond):
+    def __sub__(self,action):
-        if isinstance(cond,str): return _ConditionalTransition(self,cond)
+        if isinstance(action,str): return _ConditionalTransition(self,action)
-        raise TypeError("Condition must be a string")
+        raise TypeError("Action must be a string")
 class _ConditionalTransition:
-    def __init__(self,src,cond): self.src,self.cond=src,cond
+    def __init__(self,src,action): self.src,self.action=src,action
-    def __rshift__(self,tgt): return self.src.add_successor(tgt,self.cond)
+    def __rshift__(self,tgt): return self.src.add_successor(tgt,self.action)
 class Node(BaseNode):
    def __init__(self,max_retries=1): 
@ -42,16 +42,16 @@ class BatchNode(Node):
    def _exec(self,shared,items): return [super(Node,self)._exec(shared,i) for i in items]
 class Flow(BaseNode):
-    def __init__(self,start_node):
+    def __init__(self,start):
        super().__init__()
-        self.start_node=start_node
+        self.start=start
-    def get_next_node(self,curr,cond):
+    def get_next_node(self,curr,action):
-        nxt=curr.successors.get(cond if cond is not None else "default")
+        nxt=curr.successors.get(action if action is not None else "default")
        if not nxt and curr.successors: 
-            warnings.warn(f"Flow ends: condition '{cond}' not found in {list(curr.successors)}")
+            warnings.warn(f"Flow ends: action '{action}' not found in {list(curr.successors)}")
        return nxt
    def _exec(self,shared,params=None):
-        curr,p=self.start_node,(params if params else {**self.params})
+        curr,p=self.start,(params if params else {**self.params})
        while curr:
            curr.set_params(p)
            c=curr._run(shared)
@ -83,7 +83,7 @@ class AsyncNode(Node):
 class AsyncFlow(Flow,AsyncNode):
    async def _exec_async(self,shared,params=None):
-        curr,p=self.start_node,(params if params else {**self.params})
+        curr,p=self.start,(params if params else {**self.params})
        while curr:
            curr.set_params(p)
            c=await curr._run_async(shared) if hasattr(curr,"run_async") else curr._run(shared)
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
    name="minillmflow",
-    version="0.0.0",
+    version="0.0.2",
    packages=find_packages(),
    author="Zachary Huang",
    author_email="zh2408@columbia.edu",
--- a/tests/test_async_batch_flow.py
+++ b/tests/test_async_batch_flow.py
@ -46,7 +46,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
            }
        }
-        flow = SimpleTestAsyncBatchFlow(start_node=self.process_node)
+        flow = SimpleTestAsyncBatchFlow(start=self.process_node)
        asyncio.run(flow.run_async(shared_storage))
        expected_results = {
@ -66,7 +66,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
            'input_data': {}
        }
-        flow = EmptyTestAsyncBatchFlow(start_node=self.process_node)
+        flow = EmptyTestAsyncBatchFlow(start=self.process_node)
        asyncio.run(flow.run_async(shared_storage))
        self.assertEqual(shared_storage.get('results', {}), {})
@ -85,7 +85,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
            }
        }
-        flow = ErrorTestAsyncBatchFlow(start_node=AsyncErrorNode())
+        flow = ErrorTestAsyncBatchFlow(start=AsyncErrorNode())
        with self.assertRaises(ValueError):
            asyncio.run(flow.run_async(shared_storage))
@ -126,7 +126,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
            }
        }
-        flow = NestedAsyncBatchFlow(start_node=inner_node)
+        flow = NestedAsyncBatchFlow(start=inner_node)
        asyncio.run(flow.run_async(shared_storage))
        expected_results = {
@ -162,7 +162,7 @@ class TestAsyncBatchFlow(unittest.TestCase):
            }
        }
-        flow = CustomParamAsyncBatchFlow(start_node=CustomParamAsyncNode())
+        flow = CustomParamAsyncBatchFlow(start=CustomParamAsyncNode())
        asyncio.run(flow.run_async(shared_storage))
        expected_results = {
--- a/tests/test_async_flow.py
+++ b/tests/test_async_flow.py
@ -86,14 +86,14 @@ class TestAsyncFlow(unittest.TestCase):
        """
        # Create our nodes
-        start_node = AsyncNumberNode(5)
+        start = AsyncNumberNode(5)
        inc_node = AsyncIncrementNode()
-        # Chain them: start_node >> inc_node
+        # Chain them: start >> inc_node
-        start_node - "number_set" >> inc_node
+        start - "number_set" >> inc_node
-        # Create an AsyncFlow with start_node
+        # Create an AsyncFlow with start
-        flow = AsyncFlow(start_node)
+        flow = AsyncFlow(start)
        # We'll run the flow synchronously (which under the hood is asyncio.run())
        shared_storage = {}
@ -135,15 +135,15 @@ class TestAsyncFlow(unittest.TestCase):
        shared_storage = {"value": 10}
-        start_node = BranchingAsyncNode()
+        start = BranchingAsyncNode()
        positive_node = PositiveNode()
        negative_node = NegativeNode()
        # Condition-based chaining
-        start_node - "positive_branch" >> positive_node
+        start - "positive_branch" >> positive_node
-        start_node - "negative_branch" >> negative_node
+        start - "negative_branch" >> negative_node
-        flow = AsyncFlow(start_node)
+        flow = AsyncFlow(start)
        asyncio.run(flow.run_async(shared_storage))
        self.assertEqual(shared_storage["path"], "positive", 
--- a/tests/test_batch_flow.py
+++ b/tests/test_batch_flow.py
@ -40,7 +40,7 @@ class TestBatchFlow(unittest.TestCase):
            }
        }
-        flow = SimpleTestBatchFlow(start_node=self.process_node)
+        flow = SimpleTestBatchFlow(start=self.process_node)
        flow.run(shared_storage)
        expected_results = {
@ -60,7 +60,7 @@ class TestBatchFlow(unittest.TestCase):
            'input_data': {}
        }
-        flow = EmptyTestBatchFlow(start_node=self.process_node)
+        flow = EmptyTestBatchFlow(start=self.process_node)
        flow.run(shared_storage)
        self.assertEqual(shared_storage.get('results', {}), {})
@ -77,7 +77,7 @@ class TestBatchFlow(unittest.TestCase):
            }
        }
-        flow = SingleItemBatchFlow(start_node=self.process_node)
+        flow = SingleItemBatchFlow(start=self.process_node)
        flow.run(shared_storage)
        expected_results = {
@ -99,7 +99,7 @@ class TestBatchFlow(unittest.TestCase):
            }
        }
-        flow = ErrorTestBatchFlow(start_node=ErrorProcessNode())
+        flow = ErrorTestBatchFlow(start=ErrorProcessNode())
        with self.assertRaises(ValueError):
            flow.run(shared_storage)
@ -136,7 +136,7 @@ class TestBatchFlow(unittest.TestCase):
            }
        }
-        flow = NestedBatchFlow(start_node=inner_node)
+        flow = NestedBatchFlow(start=inner_node)
        flow.run(shared_storage)
        expected_results = {
@ -170,7 +170,7 @@ class TestBatchFlow(unittest.TestCase):
            }
        }
-        flow = CustomParamBatchFlow(start_node=CustomParamNode())
+        flow = CustomParamBatchFlow(start=CustomParamNode())
        flow.run(shared_storage)
        expected_results = {
--- a/tests/test_batch_node.py
+++ b/tests/test_batch_node.py
@ -74,7 +74,7 @@ class TestBatchNode(unittest.TestCase):
        chunk_node >> reduce_node
        # Create and run pipeline
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['total'], expected_sum)
@ -95,7 +95,7 @@ class TestBatchNode(unittest.TestCase):
        reduce_node = SumReduceNode()
        chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['total'], expected_sum)
@ -116,7 +116,7 @@ class TestBatchNode(unittest.TestCase):
        reduce_node = SumReduceNode()
        chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['total'], expected_sum)
@ -136,7 +136,7 @@ class TestBatchNode(unittest.TestCase):
        reduce_node = SumReduceNode()
        chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['total'], expected_sum)
@ -153,7 +153,7 @@ class TestBatchNode(unittest.TestCase):
        reduce_node = SumReduceNode()
        chunk_node >> reduce_node
-        pipeline = Flow(start_node=chunk_node)
+        pipeline = Flow(start=chunk_node)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['total'], 0)
--- a/tests/test_flow_basic.py
+++ b/tests/test_flow_basic.py
@ -45,7 +45,7 @@ class TestNode(unittest.TestCase):
    def test_single_number(self):
        shared_storage = {}
        start = NumberNode(5)
-        pipeline = Flow(start_node=start)
+        pipeline = Flow(start=start)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['current'], 5)
@ -65,7 +65,7 @@ class TestNode(unittest.TestCase):
        # Chain them in sequence using the >> operator
        n1 >> n2 >> n3
-        pipeline = Flow(start_node=n1)
+        pipeline = Flow(start=n1)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['current'], 16)
@ -94,7 +94,7 @@ class TestNode(unittest.TestCase):
        check - "positive" >> add_if_positive
        check - "negative" >> add_if_negative
-        pipeline = Flow(start_node=start)
+        pipeline = Flow(start=start)
        pipeline.run(shared_storage)
        self.assertEqual(shared_storage['current'], 15)
@ -118,7 +118,7 @@ class TestNode(unittest.TestCase):
        check - "positive" >> add_if_positive
        check - "negative" >> add_if_negative
-        pipeline = Flow(start_node=start)
+        pipeline = Flow(start=start)
        pipeline.run(shared_storage)
        # Should have gone down the 'negative' branch
@ -145,7 +145,7 @@ class TestNode(unittest.TestCase):
        # Attach a no-op node on the negative branch to avoid warning
        check - 'negative' >> no_op
-        pipeline = Flow(start_node=n1)
+        pipeline = Flow(start=n1)
        pipeline.run(shared_storage)
        # final result should be -2: (10 -> 7 -> 4 -> 1 -> -2)
--- a/tests/test_flow_composition.py
+++ b/tests/test_flow_composition.py
@ -35,21 +35,21 @@ class TestFlowComposition(unittest.TestCase):
    def test_flow_as_node(self):
        """
        1) Create a Flow (f1) starting with NumberNode(5), then AddNode(10), then MultiplyNode(2).
-        2) Create a second Flow (f2) whose start_node is f1.
+        2) Create a second Flow (f2) whose start is f1.
        3) Create a wrapper Flow (f3) that contains f2 to ensure proper execution.
        Expected final result in shared_storage['current']: (5 + 10) * 2 = 30.
        """
        shared_storage = {}
        # Inner flow f1
-        f1 = Flow(start_node=NumberNode(5))
+        f1 = Flow(start=NumberNode(5))
        f1 >> AddNode(10) >> MultiplyNode(2)
        # f2 starts with f1
-        f2 = Flow(start_node=f1)
+        f2 = Flow(start=f1)
        # Wrapper flow f3 to ensure proper execution
-        f3 = Flow(start_node=f2)
+        f3 = Flow(start=f2)
        f3.run(shared_storage)
        self.assertEqual(shared_storage['current'], 30)
@ -65,15 +65,15 @@ class TestFlowComposition(unittest.TestCase):
        shared_storage = {}
        # Build the inner flow
-        inner_flow = Flow(start_node=NumberNode(5))
+        inner_flow = Flow(start=NumberNode(5))
        inner_flow >> AddNode(3)
-        # Build the middle flow, whose start_node is the inner flow
+        # Build the middle flow, whose start is the inner flow
-        middle_flow = Flow(start_node=inner_flow)
+        middle_flow = Flow(start=inner_flow)
        middle_flow >> MultiplyNode(4)
        # Wrapper flow to ensure proper execution
-        wrapper_flow = Flow(start_node=middle_flow)
+        wrapper_flow = Flow(start=middle_flow)
        wrapper_flow.run(shared_storage)
        self.assertEqual(shared_storage['current'], 32)
@ -91,16 +91,16 @@ class TestFlowComposition(unittest.TestCase):
        # flow1
        numbernode = NumberNode(10)
        numbernode >> AddNode(10)
-        flow1 = Flow(start_node=numbernode)
+        flow1 = Flow(start=numbernode)
        # flow2
-        flow2 = Flow(start_node=MultiplyNode(2))
+        flow2 = Flow(start=MultiplyNode(2))
        # Chain flow1 to flow2
        flow1 >> flow2
        # Wrapper flow to ensure proper execution
-        wrapper_flow = Flow(start_node=flow1)
+        wrapper_flow = Flow(start=flow1)
        wrapper_flow.run(shared_storage)
        self.assertEqual(shared_storage['current'], 40)