\documentclass[11pt]{article}
\usepackage[T1]{fontenc}
\usepackage{textcomp}
\usepackage{lmodern}
\usepackage[margin=1in]{geometry}
\usepackage[pdftex]{graphicx, color}
\usepackage{mathtools}
\usepackage{listings}
\usepackage[pdfusetitle]{hyperref}
\usepackage{forest}

\newcommand\tab[1][1cm]{\hspace*{#1}}
\usepackage{array}
\newcommand{\PreserveBackslash}[1]{\let\temp=\\#1\let\\=\temp}
\newcolumntype{C}[1]{>{\PreserveBackslash\centering}p{#1}}
\newcolumntype{R}[1]{>{\PreserveBackslash\raggedleft}p{#1}}
\newcolumntype{L}[1]{>{\PreserveBackslash\raggedright}p{#1}}

\usepackage{tikz}
\usetikzlibrary{automata,positioning}

\let\epsilon\varepsilon
\tikzset{shorten >=1pt, node distance=2cm, on grid, baseline={([yshift=-8pt] current bounding box.north)}}


\lstset{basicstyle=\small\ttfamily,breaklines=true}

\title{CS143 Spring 2026 -- Written Assignment 2 }

\begin{document}
\begin{center}
\LARGE CS143 Spring 2026 -- Written Assignment 2 \\
{\large Due Monday, April 27, 2026 11:59 PM PDT}
\end{center}

This assignment covers context free grammars and parsing. You may discuss this assignment with other students and work on the problems together. However, your write-up should be your own individual work, and you should indicate in your submission who you worked with, if applicable. Assignments can be submitted electronically through Gradescope as a \textsc{pdf} by 11:59 \textsc{pm pdt}. Please review the the course policies for more information: \url{https://web.stanford.edu/class/cs143/policies/}. A \LaTeX{} template for writing your solutions is available on the course website.
If you need to draw parse trees in \LaTeX{}, you may use the {\tt forest} package: \url{https://ctan.org/pkg/forest}.

\bigskip

\begin{enumerate}
% Problem 1
\item  Give a context-free grammar (CFG) for each of the following languages. Any grammar is acceptable---including ambiguous grammars---as long as it has the correct language. The start symbol should be $S$.
  \begin{enumerate}
    \item The set of all strings over the alphabet $\{2, -, + \}$ representing valid arithmetic expressions where each integer in the expression is a single digit and the expression evaluates to some value $\geq$ 0. Note that the empty string $\epsilon$ is \emph{not} in the language. \\ 
    Example Strings in the Language: 
    \begin{center}
     2+2 \tab \tab 2-2+2 \tab \tab -2-2+2+2   
    \end{center}
    Strings not in the Language: 
    \begin{center}
    -2  \tab \tab -2+22 \tab \tab 2++2-2 
    \end{center}
    \textbf{Solution:} 
    %% Your answer here
  
  
  \newpage
    \item The set of all strings over the alphabet $\{0, 1\}$ with exactly one more 0 than 1's.


    Example Strings in the Language:
    \[
        0 \qquad 100 \qquad 010 \qquad 0010011
    \]

    Strings not in the Language:
    \[
        \epsilon \qquad 00 \qquad 01 \qquad 0110 \qquad 0100
    \]

    \textbf{Solution:}
    
  
  \newpage
  \item The set of all strings over the alphabet $\{0,1\}$ in the language $L:\{0^i 1^j 0^k \mid j = i + k\}$.

    Example Strings in the Language:
    \begin{center}
      $\epsilon$ \tab \tab 10 \tab \tab 01 \tab \tab 0110  \tab \tab 000111
    \end{center}
    Strings not in the Language:
    \begin{center}
      0 \tab \tab 00 \tab \tab 001 \tab \tab 0010 \tab \tab 01110
    \end{center}

    \textbf{Solution}:\\
   
  
  \newpage
    \item The set of all strings over the alphabet $\{[,],\{,\},,\}$ which are sets. We define a set to be a collection of zero or more comma-separated arrays enclosed in an open brace and a close brace. Similarly, we define an array to be a collection of zero or more comma-separated sets enclosed in an open bracket and a close bracket. \textbf{Note} that "," (comma) is in the alphabet.\\
    Example Arrays:
    \begin{center}
    $[\{\},\{\}]$\tab $[]$ \tab $[\{[],[]\}]$   
    \end{center}
    Example Sets:
    \begin{center}
    $\{[]\}$ \tab $\{\}$ \tab $\{[\{\}], []\}$  
    \end{center}
    Example Strings in the Language: 
    \begin{center}
    $\{\}$\tab $\{[],[\{[]\}]\}$\tab $\{[\{\}, \{\}, \{\}],[]\}$   
    \end{center}
    Strings not in the Language: 
    \begin{center}
    []  \tab \tab $\{\{\}\}$ \tab \tab $\{[[]]\}$
    \end{center}

    \textbf{Solution:} 
 
    %% Your answer here
    \end{enumerate}

  \newpage

% Problem 2
\item Consider the following grammar for if-then-else expressions that involve a variable $x$:
  \begin{align*}
    E &\to \text{if} \ x \ \text{then} \ E \mid \mathit{M} \\
    \mathit{M} &\to \text{if} \ x \ \text{then} \ \mathit{M} \ \text{else} \ E \mid x
  \end{align*}
  Is this grammar ambiguous or not?
  If yes, give an example of an expression with two different parse trees and draw the two parse trees.
  If not, explain why that is the case.

  \textbf{Solution}:
 
  
  \newpage

% Problem 3
\item
\begin{enumerate}
      \item Left factor the following grammar:
        \begin{equation*}
        \begin{split}
          S &\to I \mid I-J \mid I+K  \\
          I &\to (J-K) \mid (J)  \\
          J &\to K1 \mid K2 \\
          K &\to K3 \mid \epsilon 
        \end{split}
      \end{equation*}
    \textbf{Solution:} 
    
    
    \newpage
      \item Eliminate left recursion from the following grammar:
        \begin{equation*}
        \begin{split}
          S &\to STS \mid ST \mid T  \\
          T &\to Ta \mid Tb \mid U\\
          U &\to T \mid c \\
        \end{split}
      \end{equation*}
  \end{enumerate}
  \textbf{Solution:} 

    
 

  \newpage

% Problem 4
\item Consider the following CFG, where the set of terminals is $\{a, b, c, d, \# , ?\}$:
  \begin{equation*}
    \begin{split}
      S &\to \# U T \mid T ? \\
      T &\to aS \mid bUc \mid \epsilon \\
      U &\to aSc \mid bTd \\
    \end{split}
  \end{equation*}

  \begin{enumerate}
  \item Construct the FIRST sets for each of the nonterminals.
    
    \textbf{Solution}:
    \begin{itemize}
    \item $S: $
    \item $T: $
    \item $U: $
    \end{itemize}
    
  \item Construct the FOLLOW sets for each of the nonterminals.

    \textbf{Solution}:
    \begin{itemize}
    \item $S: $
    \item $T: $
    \item $U: $
    \end{itemize}

  \item Construct the LL(1) parsing table for the grammar.

    \textbf{Solution}:
    \begin{center}
      \begin{tabular}{c|C{3em}|C{3em}|C{3em}|C{3em}|C{3em}|C{3em}|C{3em}}
        Nonterminal & $a$ & $b$ & $c$ & $d$ & $\#$ & $?$ & $\$$ \\
        \hline
        $S$ & & & & & & \\
        \hline
        $T$ & & & & & & & \\
        \hline
        $U$ & & & & & &
      \end{tabular}
    \end{center}
    
  \item Show the sequence of stack, input and action configurations that occur during an LL(1) parse of the string ``$\# a?ca?$''. At the beginning of the parse, the stack should contain a single $S$.

    \textbf{Solution}:
    \begin{center}
      \begin{tabular}{R{10em}|R{10em}|L{10em}}
        Stack & Input & Action \\
        \hline
        \rule{0pt}{19em} & & \\
       
      \end{tabular}
    \end{center}
  \end{enumerate}

  \newpage

% Problem 5 
\item Consider the following grammar $G$ over the alphabet $\{a,b,c\}$:
  \begin{equation*}
    \begin{split}
      S'&\to S \\
      S &\to Aa \\
      S &\to Bb \\
      A &\to Ac \\
      A &\to \epsilon\\
      B &\to Bc \\
      B &\to \epsilon
    \end{split}
  \end{equation*}
  You want to implement $G$ using an SLR(1) parser. Note that we have already added the $S' \to S$ production for you.
  \begin{enumerate}
  \item Construct the first state of the LR(0) machine, compute the FOLLOW sets of A and B, and point out the conflicts that prevent the grammar from being SLR(1).

    \textbf{Solution}:
    \newpage

  \item Show modifications to production 4 ($A \to Ac$) and production 6 ($B \to Bc$) to make the grammar SLR(1) while having the same language as the original grammar $G$. Explain the intuition behind this result.

    \textbf{Solution}:

  \end{enumerate} 

\end{enumerate}
\end{document}
